Update lncrawler/lncrawler.js
This commit is contained in:
+18
-6
@@ -35,8 +35,6 @@ async function searchResults(keyword) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log(extractText("https://lncrawler.monster/novels/kokoro-connect-hito-random/lncrawler/chapter/100"));
|
|
||||||
|
|
||||||
async function extractDetails(slug) {
|
async function extractDetails(slug) {
|
||||||
try {
|
try {
|
||||||
const response = await soraFetch(`https://api.lncrawler.monster/novels/${slug}/`);
|
const response = await soraFetch(`https://api.lncrawler.monster/novels/${slug}/`);
|
||||||
@@ -120,35 +118,49 @@ async function extractChapters(slug) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async function extractText(url) {
|
async function extractText(url) {
|
||||||
|
console.log(`[extractText] Starting extraction for URL: ${url}`);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const requestUrl = new URL(url);
|
const requestUrl = new URL(url);
|
||||||
|
console.log(`[extractText] Parsed URL — hostname: ${requestUrl.hostname}, pathname: ${requestUrl.pathname}`);
|
||||||
|
|
||||||
if (requestUrl.hostname === "lncrawler.monster") {
|
if (requestUrl.hostname === "lncrawler.monster") {
|
||||||
requestUrl.hostname = "api.lncrawler.monster";
|
requestUrl.hostname = "api.lncrawler.monster";
|
||||||
|
console.log(`[extractText] Redirected to API subdomain: ${requestUrl.toString()}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
console.log(`[extractText] Fetching: ${requestUrl.toString()}`);
|
||||||
const response = await soraFetch(requestUrl.toString(), {
|
const response = await soraFetch(requestUrl.toString(), {
|
||||||
headers: {
|
headers: { Accept: "application/json, text/plain, */*" }
|
||||||
Accept: "application/json, text/plain, */*"
|
|
||||||
}
|
|
||||||
});
|
});
|
||||||
|
|
||||||
if (!response) {
|
if (!response) {
|
||||||
|
console.warn(`[extractText] No response received for URL: ${requestUrl.toString()}`);
|
||||||
throw new Error("No response received");
|
throw new Error("No response received");
|
||||||
}
|
}
|
||||||
|
console.log(`[extractText] Response received — status: ${response.status}, ok: ${response.ok}`);
|
||||||
|
|
||||||
const payload = await response.json();
|
const payload = await response.json();
|
||||||
|
console.log(`[extractText] Payload parsed — keys: [${Object.keys(payload ?? {}).join(", ")}]`);
|
||||||
|
console.log(`[extractText] images_path: ${payload?.images_path ?? "N/A"}, body length: ${payload?.body?.length ?? 0} chars`);
|
||||||
|
|
||||||
let content = cleanChapterBody(payload?.body);
|
let content = cleanChapterBody(payload?.body);
|
||||||
|
console.log(`[extractText] After cleanChapterBody — content length: ${content?.length ?? 0} chars`);
|
||||||
|
|
||||||
content = normalizeChapterImageUrls(content, payload?.images_path, requestUrl.origin);
|
content = normalizeChapterImageUrls(content, payload?.images_path, requestUrl.origin);
|
||||||
|
console.log(`[extractText] After normalizeChapterImageUrls — content length: ${content?.length ?? 0} chars`);
|
||||||
|
|
||||||
if (!content) {
|
if (!content) {
|
||||||
|
console.warn(`[extractText] Content is empty after processing — body was: ${JSON.stringify(payload?.body)?.slice(0, 200)}`);
|
||||||
throw new Error("Chapter body not found");
|
throw new Error("Chapter body not found");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
console.log(`[extractText] Extraction successful — final content length: ${content.length} chars`);
|
||||||
console.log(content);
|
console.log(content);
|
||||||
return content;
|
return content;
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.log("Fetch error in extractText: " + error);
|
console.error(`[extractText] Error during extraction for URL "${url}": ${error.message}`, error);
|
||||||
return '<p>Error extracting text</p>';
|
return '<p>Error extracting text</p>';
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user