diff --git a/lncrawler/lncrawler.js b/lncrawler/lncrawler.js index 9b67aaa..e3c9b44 100644 --- a/lncrawler/lncrawler.js +++ b/lncrawler/lncrawler.js @@ -118,49 +118,33 @@ async function extractChapters(slug) { } async function extractText(url) { - console.log(`[extractText] Starting extraction for URL: ${url}`); - try { - const requestUrl = new URL(url); - console.log(`[extractText] Parsed URL — hostname: ${requestUrl.hostname}, pathname: ${requestUrl.pathname}`); - - if (requestUrl.hostname === "lncrawler.monster") { - requestUrl.hostname = "api.lncrawler.monster"; - console.log(`[extractText] Redirected to API subdomain: ${requestUrl.toString()}`); + let requestUrl = url; + if (url.includes("lncrawler.monster") && !url.includes("api.lncrawler.monster")) { + requestUrl = url.replace("lncrawler.monster", "api.lncrawler.monster"); } - console.log(`[extractText] Fetching: ${requestUrl.toString()}`); - const response = await soraFetch(requestUrl.toString(), { + const response = await soraFetch(requestUrl, { headers: { Accept: "application/json, text/plain, */*" } }); if (!response) { - console.warn(`[extractText] No response received for URL: ${requestUrl.toString()}`); throw new Error("No response received"); } - console.log(`[extractText] Response received — status: ${response.status}, ok: ${response.ok}`); const payload = await response.json(); - console.log(`[extractText] Payload parsed — keys: [${Object.keys(payload ?? {}).join(", ")}]`); - console.log(`[extractText] images_path: ${payload?.images_path ?? "N/A"}, body length: ${payload?.body?.length ?? 0} chars`); + const origin = requestUrl.split("/").slice(0, 3).join("/"); let content = cleanChapterBody(payload?.body); - console.log(`[extractText] After cleanChapterBody — content length: ${content?.length ?? 0} chars`); - - content = normalizeChapterImageUrls(content, payload?.images_path, requestUrl.origin); - console.log(`[extractText] After normalizeChapterImageUrls — content length: ${content?.length ?? 0} chars`); + content = normalizeChapterImageUrls(content, payload?.images_path, origin); if (!content) { - console.warn(`[extractText] Content is empty after processing — body was: ${JSON.stringify(payload?.body)?.slice(0, 200)}`); throw new Error("Chapter body not found"); } - console.log(`[extractText] Extraction successful — final content length: ${content.length} chars`); - console.log(content); return content; - } catch (error) { - console.error(`[extractText] Error during extraction for URL "${url}": ${error.message}`, error); + console.log("Fetch error in extractText: " + error); return '

Error extracting text

'; } }