From 207dfbcac95e2479e9dd191fd4fcb680230a33f9 Mon Sep 17 00:00:00 2001 From: aka paul <50n50@noreply.localhost> Date: Sat, 14 Mar 2026 11:30:24 +0000 Subject: [PATCH] Update lncrawler/lncrawler.js --- lncrawler/lncrawler.js | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/lncrawler/lncrawler.js b/lncrawler/lncrawler.js index 59984b2..9b67aaa 100644 --- a/lncrawler/lncrawler.js +++ b/lncrawler/lncrawler.js @@ -35,8 +35,6 @@ async function searchResults(keyword) { } } -console.log(extractText("https://lncrawler.monster/novels/kokoro-connect-hito-random/lncrawler/chapter/100")); - async function extractDetails(slug) { try { const response = await soraFetch(`https://api.lncrawler.monster/novels/${slug}/`); @@ -120,35 +118,49 @@ async function extractChapters(slug) { } async function extractText(url) { + console.log(`[extractText] Starting extraction for URL: ${url}`); + try { const requestUrl = new URL(url); + console.log(`[extractText] Parsed URL — hostname: ${requestUrl.hostname}, pathname: ${requestUrl.pathname}`); + if (requestUrl.hostname === "lncrawler.monster") { requestUrl.hostname = "api.lncrawler.monster"; + console.log(`[extractText] Redirected to API subdomain: ${requestUrl.toString()}`); } + console.log(`[extractText] Fetching: ${requestUrl.toString()}`); const response = await soraFetch(requestUrl.toString(), { - headers: { - Accept: "application/json, text/plain, */*" - } + headers: { Accept: "application/json, text/plain, */*" } }); if (!response) { + console.warn(`[extractText] No response received for URL: ${requestUrl.toString()}`); throw new Error("No response received"); } + console.log(`[extractText] Response received — status: ${response.status}, ok: ${response.ok}`); const payload = await response.json(); + console.log(`[extractText] Payload parsed — keys: [${Object.keys(payload ?? {}).join(", ")}]`); + console.log(`[extractText] images_path: ${payload?.images_path ?? "N/A"}, body length: ${payload?.body?.length ?? 0} chars`); + let content = cleanChapterBody(payload?.body); + console.log(`[extractText] After cleanChapterBody — content length: ${content?.length ?? 0} chars`); + content = normalizeChapterImageUrls(content, payload?.images_path, requestUrl.origin); + console.log(`[extractText] After normalizeChapterImageUrls — content length: ${content?.length ?? 0} chars`); if (!content) { + console.warn(`[extractText] Content is empty after processing — body was: ${JSON.stringify(payload?.body)?.slice(0, 200)}`); throw new Error("Chapter body not found"); } + console.log(`[extractText] Extraction successful — final content length: ${content.length} chars`); console.log(content); return content; - + } catch (error) { - console.log("Fetch error in extractText: " + error); + console.error(`[extractText] Error during extraction for URL "${url}": ${error.message}`, error); return '
Error extracting text
'; } }