fix: Update s.to source to support new site structure and improve episode extraction

2026-01-29 22:44:03 +01:00
parent 7824ca4508
commit 484bd2dbc2
4 changed files with 402 additions and 284 deletions
@@ -5,19 +5,71 @@
 async function searchResults(keyword) {
    try {
        const encodedKeyword = encodeURIComponent(keyword);
-        const searchApiUrl = `https://s.to/ajax/seriesSearch?keyword=${encodedKeyword}`;
+        const searchApiUrl = `https://s.to/suche?term=${encodedKeyword}`;
        const response = await soraFetch(searchApiUrl);
-        const responseText = await response?.text() ?? response;
+        const text = response.text ? await response.text() : await response;

-        const data = await JSON.parse(responseText);
+        // parse html
+        /* <div class="col-6 col-md-4 col-lg-2">
+            <a href="https://s.to/serie/pluribus-glck-ist-ansteckend" class="text-decoration-none">
+                <div class="card cover-card h-100 border-0 shadow-sm">
+                    <a href="https://s.to/serie/pluribus-glck-ist-ansteckend" class="d-block show-cover">
+                        <picture>

-        const transformedResults = data.map(serie => ({
-            title: serie.name,
-            image: `https://s.to${serie.cover}`,
-            href: `https://s.to/serie/stream/${serie.link}`
-        }));
+                            <source type="image/avif" srcset="
+      https://s.to/media/images/channel/mobile/pluribus-glck-ist-ansteckend-tombd2cv?format=avif         375w,
+      https://s.to/media/images/channel/tablet/pluribus-glck-ist-ansteckend-tombd2cv?format=avif         768w,
+      https://s.to/media/images/channel/desktop/pluribus-glck-ist-ansteckend-tombd2cv?format=avif       1024w
+    " sizes="(max-width: 600px) 375px,
+           (max-width: 992px) 768px,
+           1024px">

-        return JSON.stringify(transformedResults);
+
+                            <source type="image/webp" srcset="
+      https://s.to/media/images/channel/mobile/pluribus-glck-ist-ansteckend-tombd2cv?format=webp         375w,
+      https://s.to/media/images/channel/tablet/pluribus-glck-ist-ansteckend-tombd2cv?format=webp         768w,
+      https://s.to/media/images/channel/desktop/pluribus-glck-ist-ansteckend-tombd2cv?format=webp       1024w
+    " sizes="(max-width: 600px) 375px,
+           (max-width: 992px) 768px,
+           1024px">
+
+
+                            <img src="https://s.to/media/images/channel/desktop/pluribus-glck-ist-ansteckend-tombd2cv?format=jpg"
+                                srcset="
+      https://s.to/media/images/channel/desktop/pluribus-glck-ist-ansteckend-tombd2cv?format=jpg      1x,
+      https://s.to/media/images/channel/2x-desktop/pluribus-glck-ist-ansteckend-tombd2cv?format=jpg   2x
+    " alt="Pluribus - Glück ist ansteckend" class="img-fluid w-100">
+                        </picture>
+
+
+
+                    </a>
+                    <div class="card-body py-2 p-1">
+                        <h6 class="show-title mb-0 small" title="Pluribus - Glück ist ansteckend">Pluribus - Glück ist
+                            ansteckend</h6>
+                    </div>
+                </div>
+            </a>
+        </div>
+        ...
+        */
+      const searchRegex = /<a\s+href="([^"]+)"\s+class="d-block\s+show-cover"[\s\S]*?<img\s+src="([^"]+)"[\s\S]*?class="show-title[^"]*"[^>]*>([\s\S]*?)<\//g;
+      const results = [];
+      let match;
+      while ((match = searchRegex.exec(text)) !== null) {
+          const [_, href, image, title] = match;
+          // check if href already exists in results
+          if (results.some(result => result.href === href.trim())) {
+              continue;
+          }
+       
+          results.push({ title: title.trim(), image: image.trim(), href: href.trim() });
+      }
+
+
+
+        console.log("Search Results: " + JSON.stringify(results));
+        return JSON.stringify(results);

    } catch (error) {
        sendLog('Fetch error:' + error);
@@ -29,31 +81,49 @@ async function extractDetails(url) {
    try {
        const fetchUrl = `${url}`;
        const response = await soraFetch(fetchUrl);
-        const text = response.text ? await response.text() : response;
+        const text = response.text ? await response.text() : await response;
+        
+        // get description <span class="description-text">
+        const descriptionRegex = /<span class="description-text"[^>]*>([\s\S]*?)<\/span>/;
+        const descriptionMatch = descriptionRegex.exec(text);
+        let description = descriptionMatch ? descriptionMatch[1].trim() : 'No description available';
+        description = description.replace(/&lt;/g, '<').replace(/&gt;/g, '>').replace(/&amp;/g, '&').replace(/&quot;/g, '"').replace(/&#39;/g, "'");

-        const descriptionRegex = /<p\s+class="seri_des"\s+itemprop="accessibilitySummary"\s+data-description-type="review"\s+data-full-description="([^"]*)".*?>(.*?)<\/p>/s;
-        const aliasesRegex = /<h1\b[^>]*\bdata-alternativetitles="([^"]+)"[^>]*>/i;
+        // get year (airdate) <a class="small text-muted" href="https://s.to/jahr/2025">2025</a>
+        const yearRegex = /<a class="small text-muted" href="https:\/\/s\.to\/jahr\/\d{4}">(\d{4})<\/a>/;
+        const yearMatch = yearRegex.exec(text);
+        const airdateMatch = yearMatch ? `${yearMatch[1]}` : 'Unknown Year';

-        const aliasesMatch = aliasesRegex.exec(text);
-        let aliasesArray = [];
-        if (aliasesMatch) {
-            aliasesArray = aliasesMatch[1].split(',').map(a => a.trim());
+
+        // get genres              
+        /* <li class="series-group">
+                    <strong class="me-1">Genre:</strong>
+
+                    
+                                            <a href="https://s.to/genre/science-fiction" class="link-light">Science Fiction</a>,                                            <a href="https://s.to/genre/comedy" class="link-light">Comedy</a>,                                            <a href="https://s.to/genre/drama" class="link-light">Drama</a>                    
+                    
+                                    </li>
+                                    */
+        const genresRegex = /<li class="series-group">\s*<strong class="me-1">Genre:<\/strong>([\s\S]*?)<\/li>/;
+        const genresMatch = genresRegex.exec(text);
+        let genres = '';
+        if (genresMatch) {
+            const genreLinksRegex = /<a href="https:\/\/s\.to\/genre\/[^"]+" class="link-light">([^<]+)<\/a>/g;
+            let genreMatch;
+            while ((genreMatch = genreLinksRegex.exec(genresMatch[1])) !== null) {
+                genres += genreMatch[1] + ', ';
+            }
+            genres = genres.replace(/, $/, '');
        }

-        const descriptionMatch = descriptionRegex.exec(text) || [];
-        // sanitize description by removing HTML tags
-        let description = descriptionMatch[1] || '';
-        description = description.replace(/<[^>]+>/g, '').trim();
-        
-
-        const airdateMatch = "Unknown"; // TODO: Implement airdate extraction

        const transformedResults = [{
            description: description || 'No description available',
-            aliases: aliasesArray[0] || 'No aliases available',
+            aliases: genres || '',
            airdate: airdateMatch
        }];

+
        return JSON.stringify(transformedResults);
    } catch (error) {
        sendLog('Details error:' + error);
@@ -77,15 +147,11 @@ async function extractEpisodes(url) {
        console.log("Season Links: " + JSON.stringify(seasonLinks));

        for (const seasonLink of seasonLinks) {
-            const seasonEpisodes = await fetchSeasonEpisodes(`${baseUrl}${seasonLink}`);
+            const seasonEpisodes = await fetchSeasonEpisodes(`${seasonLink}`);
            finishedList.push(...seasonEpisodes);
        }
        console.log("Finished Episode List: " + JSON.stringify(finishedList));

-        // Replace the field "number" with the current index of each item, starting from 1
-        finishedList.forEach((item, index) => {
-            item.number = index + 1;
-        });

        return JSON.stringify(finishedList);

@@ -99,54 +165,19 @@ async function extractEpisodes(url) {

 async function extractStreamUrl(url) {
  try {
-    const baseUrl = 'https://s.to';
+    const language = [2, 4, 3]; // Englisch Dub, Eng-Sub, Ger-Sub
+
    const fetchUrl = `${url}`;
    const response = await soraFetch(fetchUrl);
+    if (!_0xCheck()) return 'https://files.catbox.moe/avolvc.mp4';
    const text = response.text ? await response.text() : response;

-    const finishedList = [];
-    const languageList = getAvailableLanguages(text);
    const videoLinks = getVideoLinks(text);
-    if (!_0xCheck()) return 'https://files.catbox.moe/avolvc.mp4';
-    sendLog("Video Links: " + JSON.stringify(videoLinks));

-    for (const videoLink of videoLinks) {
-      const language = languageList.find(
-        (l) => l.langKey === videoLink.langKey
-      );
-      if (language) {
-        finishedList.push({
-          provider: videoLink.provider,
-          href: `${baseUrl}${videoLink.href}`,
-          language: language.title,
-        });
-      }
-    }

-    // Select the hoster
-    let providerArray = selectHoster(finishedList);
-    let newProviderArray = {};
+    let providerArray = selectHoster(videoLinks, language);

-    for (const [key, value] of Object.entries(providerArray)) {
-      const providerLink = key;
-      const providerName = value;
-      
-      // fetch the provider link and extract the stream URL
-      const streamUrl = await soraFetch(providerLink);
-      console.log("Stream URL: " + streamUrl);
-    const winLocRegex = /window\.location\.href\s*=\s*['"]([^'"]+)['"]/;
-      const winLocMatch = await winLocRegex.exec(streamUrl);
-      let winLocUrl = null;
-      if (!winLocMatch) {
-        winLocUrl = providerLink;
-      } else {
-        winLocUrl = winLocMatch[1];
-      }
-
-      newProviderArray[winLocUrl] = providerName;
-    }
-
-    sendLog("Provider List: " + JSON.stringify(newProviderArray));
+    sendLog("Provider List: " + JSON.stringify(providerArray));

    // Call the multiExtractor function with the new provider array
    // let streams = [];
@@ -159,7 +190,7 @@ async function extractStreamUrl(url) {

    try {
        // Inside extractStreamUrl function
-        let streams = await multiExtractor(newProviderArray);
+        let streams = await multiExtractor(providerArray);
        let returnedStreams = {
            streams: streams,
        };
@@ -194,8 +225,14 @@ async function extractStreamUrl(url) {
 /////////////////////////////////////////////////////////////////////////////////////

 // Helper function to select the hoster
-function selectHoster(finishedList) {
+function selectHoster(finishedList, preferredLang) {
  let provider = {};
+  const languages = {
+      'Deutsch': 1,
+      'Englisch': 2,
+      'Ger-Sub': 3,
+      'Eng-Sub': 4,
+    }
      // providers = {
    //   "https://vidmoly.to/embed-preghvoypr2m.html": "vidmoly",
    //   "https://speedfiles.net/40d98cdccf9c": "speedfiles",
@@ -204,26 +241,18 @@ function selectHoster(finishedList) {

    console.log("Hoster List: " + JSON.stringify(finishedList));

-  // Define the preferred providers and languages
-  const providerList = ["VOE", "SpeedFiles", "Filemoon", "Vidmoly", "DoodStream", "Vidoza", "MP4Upload"];
-  const languageList = ["English", "mit Untertitel Deutsch", "mit Untertitel Englisch"];
-  
-  
-
-  for (const language of languageList) {
-  for (const providerName of providerList) {
-      const video = finishedList.find(
-        (video) => video.provider === providerName && video.language === language
-      );
-      if (video) {
-        provider[video.href] = providerName.toLowerCase();
+    // Prioritize based on preferredLang, if a lang with higher priority is found, skip the rest
+    for (const lang of preferredLang) {
+      for (const video of finishedList) {
+        if (video.language === lang) {
+          provider[video.href] = video.provider;
+        }
+      }
+      if (Object.keys(provider).length > 0) {
+        break; // break outer loop if we have found at least one provider
      }
    }
-    // if the array is not empty, break the loop
-    if (Object.keys(provider).length > 0) {
-      break;
-    }
-  }
+

  sendLog("Provider List: " + JSON.stringify(provider));
  return provider;
@@ -246,11 +275,24 @@ async function sendLog(message) {
 // Site specific structure
 function getSeasonLinks(html) {
    const seasonLinks = [];
-    const seasonRegex = /<div class="hosterSiteDirectNav" id="stream">.*?<ul>(.*?)<\/ul>/s;
+    //             <nav class="mb-2" id="season-nav"> <ul class="nav list-items-nav">
+    const seasonRegex = /<nav class="mb-2" id="season-nav">[\s\S]*?<ul class="nav list-items-nav">([\s\S]*?)<\/ul>/;
+
    const seasonMatch = seasonRegex.exec(html);
    if (seasonMatch) {
        const seasonList = seasonMatch[1];
-        const seasonLinkRegex = /<a[^>]*href="([^"]+)"[^>]*>([^<]+)<\/a>/g;
+        /* <a
+                    href="https://s.to/serie/dexter/staffel-1"
+                    class="alphabet-link nav-link bg-primary "
+                    data-season-pill="1"
+                > 1 </a>
+                              <a
+                    href="https://s.to/serie/dexter/staffel-2"
+                    class="alphabet-link nav-link  "
+                    data-season-pill="2"
+                > 2 </a>
+                */
+        const seasonLinkRegex = /<a\s+[^>]*?href="([^"]+)"[^>]*?class="[^"]*alphabet-link/g;
        let seasonLinkMatch;
        const filmeLinks = [];
        while ((seasonLinkMatch = seasonLinkRegex.exec(seasonList)) !== null) {
@@ -263,6 +305,7 @@ function getSeasonLinks(html) {
        }
        seasonLinks.push(...filmeLinks);
    }
+
    return seasonLinks;
 }

@@ -270,39 +313,52 @@ function getSeasonLinks(html) {
 // Site specific structure
 async function fetchSeasonEpisodes(url) {
    try {
-        const baseUrl = 'https://s.to';
        const fetchUrl = `${url}`;
        const response = await soraFetch(fetchUrl);
-        const text = await response?.text() ?? response;
+        const text = await response?.text() ?? await response;

-        // Updated regex to allow empty <strong> content
-        const regex = /<td class="seasonEpisodeTitle">\s*<a[^>]*href="([^"]+)"[^>]*>.*?<strong>([^<]*)<\/strong>.*?<span>([^<]+)<\/span>.*?<\/a>/g;
-const regex2 =
-  /<td[^>]*seasonEpisodeTitle[^>]*>\s*<a[^>]*href=["']([^"']+)["'][^>]*>[\s\S]*?<strong>\s*([^<]*?)\s*<\/strong>[\s\S]*?(?:<span[^>]*>\s*([^<]*?)\s*<\/span>)?[\s\S]*?<\/a>/gi;
-
-
-        const matches = [];
-        let match;
-        let holderNumber = 0;
-
-        while ((match = regex.exec(text)) !== null) {
-            const [_, link] = match;
-            matches.push({ number: holderNumber, href: `${baseUrl}${link}` });
+        //           <nav class="mb-3" id="episode-nav"> <ul class="nav list-items-nav"></ul>
+        const episodeDivRegex = /<nav class="mb-3" id="episode-nav">[\s\S]*?<ul class="nav list-items-nav">([\s\S]*?)<\/ul>/;
+        const episodeDivMatch = episodeDivRegex.exec(text);
+        const episodeList = [];
+        if (episodeDivMatch) {
+            const episodeListHtml = episodeDivMatch[1];
+            /* <li class="nav-item me-1 mb-2">
+                    <a href="https://s.to/serie/dexter/staffel-1/episode-1"
+                       class=" alphabet-link nav-link  ">
+                        1
+                    </a>
+                </li>
+                            
+                <li class="nav-item me-1 mb-2">
+                    <a href="https://s.to/serie/dexter/staffel-1/episode-2"
+                       class=" alphabet-link nav-link  ">
+                        2
+                    </a>
+                </li>
+                            
+                <li class="nav-item me-1 mb-2">
+                    <a href="https://s.to/serie/dexter/staffel-1/episode-3"
+                       class=" alphabet-link nav-link  ">
+                        3
+                    </a>
+                </li>
+                */
+            const episodeLinkRegex = /<a\s+[^>]*?href="([^"]+)"[^>]*?class="[^"]*alphabet-link[^"]*"[^>]*>\s*([\s\S]*?)\s*<\/a>/g;
+            let episodeLinkMatch;
+            let number = 0;
+            while ((episodeLinkMatch = episodeLinkRegex.exec(episodeListHtml)) !== null) {
+                const [_, episodeLink] = episodeLinkMatch;
+                number += 1;
+                episodeList.push({ number, href: episodeLink });
+            }
+            console.log("Episode List for season " + url + ": " + JSON.stringify(episodeList));
        }
+        return episodeList;

-        // If no matches found with the first regex, try the second one
-        if (matches.length === 0) {
-          console.log("No matches found with first regex, trying second regex.");
-          while ((match = regex2.exec(text)) !== null) {
-            const [_, link] = match;
-            console.log("Match found with second regex: " + link);
-            matches.push({ number: holderNumber, href: `${baseUrl}${link}` });
-          }
-        }
+            

-        return matches;
-
-    } catch (error) {
+            } catch (error) {
        sendLog('FetchSeasonEpisodes helper function error:' + error);
        return [{ number: '0', href: 'https://error.org' }];
    }
@@ -317,32 +373,34 @@ const regex2 =
 // Helper function to get the video links
 // Site specific structure
 function getVideoLinks(html) {
+    const baseUrl = 'https://s.to';
+    
+      // get links based on button:
+    /* <button type="button"
+                                                                    class="link-box btn btn-dark w-100 text-start gap-2"
+                                                                    data-link-id="21778990"
+                                                                    data-play-url="/r?t=eyJpdiI6IjRBTHNuWGVqekFjMVBzK09rREhqelE9PSIsInZhbHVlIjoiMzBnQjZJSW5INHBhWm1qNk9UdGVGYkR0Nyt4Mm1iVlYwRkk5TWFGNEpjaFdDcGJkSW11dTRSeE5RcndqRlJ5SWtXSitHVzFJc0pPSU9rdjBaa3RxUUE9PSIsIm1hYyI6IjM4ZDAxMjgyZWQ5ODYzYWZhODVlODJmMDAyNGMxZDcwODg3NTI0NDJjYWM2YjNlMTM5OGE5YWEzYWMzZjdiNWYiLCJ0YWciOiIifQ%3D%3D"
+                                                                    data-auto-embed="1"
+                                                                    data-provider-name="VOE"
+                                                                    data-language-label="Deutsch"
+                                                                    data-language-id="1"
+                                                            >
+                                                            ...
+                                                            </button>
+    */
    const videoLinks = [];
-    const videoRegex = /<li\s+class="[^"]*"\s+data-lang-key="([^"]+)"[^>]*>.*?<a[^>]*href="([^"]+)"[^>]*>.*?<h4>([^<]+)<\/h4>.*?<\/a>.*?<\/li>/gs;
-    let match;
-
-    while ((match = videoRegex.exec(html)) !== null) {
-        const [_, langKey, href, provider] = match;
-        videoLinks.push({ langKey, href, provider });
+    const videoLinkRegex = /<button\s+type="button"[^>]*?class="link-box btn btn-dark w-100 text-start gap-2"[^>]*?data-play-url="([^"]+)"[^>]*?data-provider-name="([^"]+)"[^>]*?data-language-id="([^"]+)"[^>]*?>/g;
+    let videoLinkMatch;
+    while ((videoLinkMatch = videoLinkRegex.exec(html)) !== null) {
+        const [_, href, provider, language] = videoLinkMatch;
+        videoLinks.push({ href: `${baseUrl}${href}`, provider: provider.toLowerCase(), language: parseInt(language) });
    }

-    return videoLinks;
+    console.log("Video Links: " + JSON.stringify(videoLinks));  
+    
+    return videoLinks; // langKey, href, provider
 }

-// Helper function to get the available languages
-// Site specific structure
-function getAvailableLanguages(html) {
-    const languages = [];
-    const languageRegex = /<img[^>]*data-lang-key="([^"]+)"[^>]*title="([^"]+)"[^>]*>/g;
-    let match;
-
-    while ((match = languageRegex.exec(html)) !== null) {
-        const [_, langKey, title] = match;
-        languages.push({ langKey, title });
-    }
-
-    return languages;
-}
 function _0xCheck() {
    var _0x1a = typeof _0xB4F2 === 'function';
    var _0x2b = typeof _0x7E9A === 'function';