Update lncrawler/lncrawler.js

This commit is contained in:
aka paul
2026-03-14 11:34:14 +00:00
parent 207dfbcac9
commit a32db404cd
+7 -23
View File
@@ -118,49 +118,33 @@ async function extractChapters(slug) {
} }
async function extractText(url) { async function extractText(url) {
console.log(`[extractText] Starting extraction for URL: ${url}`);
try { try {
const requestUrl = new URL(url); let requestUrl = url;
console.log(`[extractText] Parsed URL — hostname: ${requestUrl.hostname}, pathname: ${requestUrl.pathname}`); if (url.includes("lncrawler.monster") && !url.includes("api.lncrawler.monster")) {
requestUrl = url.replace("lncrawler.monster", "api.lncrawler.monster");
if (requestUrl.hostname === "lncrawler.monster") {
requestUrl.hostname = "api.lncrawler.monster";
console.log(`[extractText] Redirected to API subdomain: ${requestUrl.toString()}`);
} }
console.log(`[extractText] Fetching: ${requestUrl.toString()}`); const response = await soraFetch(requestUrl, {
const response = await soraFetch(requestUrl.toString(), {
headers: { Accept: "application/json, text/plain, */*" } headers: { Accept: "application/json, text/plain, */*" }
}); });
if (!response) { if (!response) {
console.warn(`[extractText] No response received for URL: ${requestUrl.toString()}`);
throw new Error("No response received"); throw new Error("No response received");
} }
console.log(`[extractText] Response received — status: ${response.status}, ok: ${response.ok}`);
const payload = await response.json(); const payload = await response.json();
console.log(`[extractText] Payload parsed — keys: [${Object.keys(payload ?? {}).join(", ")}]`); const origin = requestUrl.split("/").slice(0, 3).join("/");
console.log(`[extractText] images_path: ${payload?.images_path ?? "N/A"}, body length: ${payload?.body?.length ?? 0} chars`);
let content = cleanChapterBody(payload?.body); let content = cleanChapterBody(payload?.body);
console.log(`[extractText] After cleanChapterBody — content length: ${content?.length ?? 0} chars`); content = normalizeChapterImageUrls(content, payload?.images_path, origin);
content = normalizeChapterImageUrls(content, payload?.images_path, requestUrl.origin);
console.log(`[extractText] After normalizeChapterImageUrls — content length: ${content?.length ?? 0} chars`);
if (!content) { if (!content) {
console.warn(`[extractText] Content is empty after processing — body was: ${JSON.stringify(payload?.body)?.slice(0, 200)}`);
throw new Error("Chapter body not found"); throw new Error("Chapter body not found");
} }
console.log(`[extractText] Extraction successful — final content length: ${content.length} chars`);
console.log(content);
return content; return content;
} catch (error) { } catch (error) {
console.error(`[extractText] Error during extraction for URL "${url}": ${error.message}`, error); console.log("Fetch error in extractText: " + error);
return '<p>Error extracting text</p>'; return '<p>Error extracting text</p>';
} }
} }