Commit initial

2026-03-08 01:33:56 +01:00
commit c5f42cf958
8 changed files with 1230 additions and 0 deletions
--- a/scrape_server.py
+++ b/scrape_server.py
@@ -0,0 +1,300 @@
+#!/usr/bin/env python3
+"""
+Torrent Tracker Scrape Server
+Remplacement auto-hébergé de scrape.php / du Cloudflare Worker.
+Dépendances : aucune (stdlib Python 3.8+)
+
+Démarrage rapide :
+  python3 scrape_server.py
+
+Avec systemd : voir torrent-scrape.service
+
+Usage : GET http://127.0.0.1:8765/?hash=<40_hex_chars>
+        GET http://127.0.0.1:8765/?magnet=<magnet_uri>
+
+Réponse JSON :
+  {"seeders": n, "leechers": n, "health": "...", "popularity": "...", "sources": n}
+"""
+
+import json
+import re
+import time
+import urllib.request
+import urllib.parse
+from http.server import HTTPServer, BaseHTTPRequestHandler
+from concurrent.futures import ThreadPoolExecutor, as_completed
+
+# ---------------------------------------------------------------
+# Configuration
+# ---------------------------------------------------------------
+
+HOST       = '127.0.0.1'   # Écouter uniquement en local (nginx fait le proxy)
+PORT       = 8765
+WORKERS    = 10             # Requêtes parallèles vers les trackers
+TIMEOUT    = 7              # Secondes par tracker
+CACHE_TTL  = 300            # Durée du cache en secondes (5 min)
+
+TRACKERS = [
+    'http://tracker.opentrackr.org:1337/scrape',
+    'http://open.tracker.cl:1337/scrape',
+    'http://tracker.openbittorrent.com:80/scrape',
+    'http://tracker.torrent.eu.org:451/scrape',
+    'http://tracker.tiny-vps.com:6969/scrape',
+    'http://tracker.files.fm:6969/scrape',
+    'http://tracker1.bt.moack.co.kr:80/scrape',
+    'http://tracker.leechersparadise.org:6969/scrape',
+    'http://open.stealth.si:80/scrape',
+    'http://tracker4.itzmx.com:2710/scrape',
+]
+
+# ---------------------------------------------------------------
+# Cache en mémoire  { hash_hex: (timestamp, data_dict) }
+# Renvoie les dernières données connues si les trackers sont muets.
+# ---------------------------------------------------------------
+
+_cache: dict = {}
+
+# ---------------------------------------------------------------
+# Décodeur bencoding (format réponse tracker)
+# ---------------------------------------------------------------
+
+def bdecode(data: bytearray, pos: list) -> object:
+    c = data[pos[0]]
+
+    # Entier : i<n>e
+    if c == ord('i'):
+        pos[0] += 1
+        end = data.index(ord('e'), pos[0])
+        n = int(data[pos[0]:end])
+        pos[0] = end + 1
+        return n
+
+    # Liste : l<items>e
+    if c == ord('l'):
+        pos[0] += 1
+        lst = []
+        while data[pos[0]] != ord('e'):
+            lst.append(bdecode(data, pos))
+        pos[0] += 1
+        return lst
+
+    # Dictionnaire : d<key><value>...e
+    if c == ord('d'):
+        pos[0] += 1
+        d = {}
+        while data[pos[0]] != ord('e'):
+            key = bdecode(data, pos)
+            val = bdecode(data, pos)
+            # Clé binaire (ex. info hash 20 octets) → hex string
+            if isinstance(key, (bytes, bytearray)):
+                key = key.hex()
+            d[str(key)] = val
+        pos[0] += 1
+        return d
+
+    # Chaîne : <longueur>:<données>
+    if chr(c).isdigit():
+        colon = data.index(ord(':'), pos[0])
+        length = int(data[pos[0]:colon])
+        pos[0] = colon + 1
+        raw = data[pos[0]:pos[0] + length]
+        pos[0] += length
+        try:
+            # Texte ASCII → str ; données binaires → bytes
+            decoded = raw.decode('ascii')
+            return decoded
+        except (UnicodeDecodeError, ValueError):
+            return bytes(raw)
+
+    return None
+
+# ---------------------------------------------------------------
+# Scrape d'un tracker
+# ---------------------------------------------------------------
+
+def scrape_tracker(tracker_url: str, hash_hex: str) -> dict | None:
+    hash_bytes = bytes.fromhex(hash_hex)
+    encoded    = urllib.parse.quote(hash_bytes, safe='')
+    url        = f"{tracker_url}?info_hash={encoded}"
+
+    try:
+        req = urllib.request.Request(
+            url,
+            headers={'User-Agent': 'TorrentIndicator/1.0'}
+        )
+        with urllib.request.urlopen(req, timeout=TIMEOUT) as resp:
+            raw = resp.read()
+    except Exception:
+        return None
+
+    try:
+        parsed = bdecode(bytearray(raw), [0])
+    except Exception:
+        return None
+
+    if not isinstance(parsed, dict) or 'files' not in parsed:
+        return None
+
+    files = parsed['files']
+    if not isinstance(files, dict):
+        return None
+
+    for file_data in files.values():
+        if isinstance(file_data, dict):
+            return {
+                'seeders':  int(file_data.get('complete',   0) or 0),
+                'leechers': int(file_data.get('incomplete', 0) or 0),
+            }
+
+    return None
+
+# ---------------------------------------------------------------
+# Parsing du magnet link
+# ---------------------------------------------------------------
+
+def extract_hash(magnet: str) -> str:
+    # Hex 40 chars
+    m = re.search(r'xt=urn:btih:([0-9a-fA-F]{40})', magnet, re.I)
+    if m:
+        return m.group(1).lower()
+
+    # Base32 32 chars
+    m = re.search(r'xt=urn:btih:([A-Z2-7]{32})', magnet, re.I)
+    if m:
+        return _base32_to_hex(m.group(1).upper())
+
+    return ''
+
+def _base32_to_hex(s: str) -> str:
+    alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'
+    buf, bits, out = 0, 0, []
+    for c in s:
+        val = alphabet.find(c)
+        if val < 0:
+            continue
+        buf = (buf << 5) | val
+        bits += 5
+        if bits >= 8:
+            bits -= 8
+            out.append((buf >> bits) & 0xFF)
+    return bytes(out).hex()
+
+# ---------------------------------------------------------------
+# Calculs santé / popularité
+# ---------------------------------------------------------------
+
+def compute_health(seeders: int, leechers: int) -> str:
+    if seeders == 0:
+        return 'dead'
+    ratio = seeders / max(1, seeders + leechers)
+    if ratio >= 0.5:
+        return 'excellent'
+    if ratio >= 0.2:
+        return 'good'
+    return 'poor'
+
+def compute_popularity(total: int) -> str:
+    if total >= 1000: return 'viral'
+    if total >= 100:  return 'popular'
+    if total >= 10:   return 'moderate'
+    return 'low'
+
+# ---------------------------------------------------------------
+# Serveur HTTP
+# ---------------------------------------------------------------
+
+class ScrapeHandler(BaseHTTPRequestHandler):
+
+    def log_message(self, *args):
+        pass  # Désactiver les logs par défaut (gérer via systemd journal)
+
+    def do_OPTIONS(self):
+        self.send_response(204)
+        self._add_cors()
+        self.end_headers()
+
+    def do_GET(self):
+        parsed    = urllib.parse.urlparse(self.path)
+        params    = urllib.parse.parse_qs(parsed.query)
+
+        hash_hex  = params.get('hash',   [''])[0].strip().lower()
+        magnet    = params.get('magnet', [''])[0].strip()
+
+        if not hash_hex and magnet:
+            hash_hex = extract_hash(urllib.parse.unquote(magnet))
+
+        if not re.fullmatch(r'[0-9a-f]{40}', hash_hex):
+            self._send_json({'error': 'Hash invalide. Fournissez ?hash= (40 hex) ou ?magnet=.'}, 400)
+            return
+
+        # Vérifier le cache avant d'interroger les trackers
+        cached = _cache.get(hash_hex)
+        if cached and (time.time() - cached[0]) < CACHE_TTL:
+            self._send_json(cached[1])
+            return
+
+        best_seeders  = 0
+        best_leechers = 0
+        sources       = 0
+
+        with ThreadPoolExecutor(max_workers=WORKERS) as executor:
+            futures = {
+                executor.submit(scrape_tracker, tracker, hash_hex): tracker
+                for tracker in TRACKERS
+            }
+            for future in as_completed(futures):
+                result = future.result()
+                if result:
+                    if result['seeders']  > best_seeders:  best_seeders  = result['seeders']
+                    if result['leechers'] > best_leechers: best_leechers = result['leechers']
+                    sources += 1
+
+        if sources == 0 and cached:
+            # Aucun tracker n'a répondu : renvoyer le cache même expiré
+            # plutôt qu'une erreur visible
+            stale = dict(cached[1])
+            stale['stale'] = True
+            self._send_json(stale)
+            return
+
+        data = {
+            'seeders':    best_seeders,
+            'leechers':   best_leechers,
+            'health':     compute_health(best_seeders, best_leechers),
+            'popularity': compute_popularity(best_seeders + best_leechers),
+            'sources':    sources,
+        }
+
+        # Mettre en cache uniquement si au moins un tracker a répondu
+        if sources > 0:
+            _cache[hash_hex] = (time.time(), data)
+
+        self._send_json(data)
+
+    def _send_json(self, data: dict, status: int = 200):
+        body = json.dumps(data).encode('utf-8')
+        self.send_response(status)
+        self.send_header('Content-Type',   'application/json; charset=utf-8')
+        self.send_header('Content-Length', str(len(body)))
+        self.send_header('Cache-Control',  'no-store')
+        self._add_cors()
+        self.end_headers()
+        self.wfile.write(body)
+
+    def _add_cors(self):
+        self.send_header('Access-Control-Allow-Origin',  '*')
+        self.send_header('Access-Control-Allow-Methods', 'GET, OPTIONS')
+        self.send_header('Access-Control-Allow-Headers', 'Content-Type')
+
+# ---------------------------------------------------------------
+# Point d'entrée
+# ---------------------------------------------------------------
+
+if __name__ == '__main__':
+    server = HTTPServer((HOST, PORT), ScrapeHandler)
+    print(f"Torrent scrape server → http://{HOST}:{PORT}")
+    print("Arrêt : Ctrl+C")
+    try:
+        server.serve_forever()
+    except KeyboardInterrupt:
+        print("\nServeur arrêté.")