Commit initial

This commit is contained in:
2026-03-08 01:33:56 +01:00
commit c5f42cf958
8 changed files with 1230 additions and 0 deletions

300
scrape_server.py Normal file
View File

@@ -0,0 +1,300 @@
#!/usr/bin/env python3
"""
Torrent Tracker Scrape Server
Remplacement auto-hébergé de scrape.php / du Cloudflare Worker.
Dépendances : aucune (stdlib Python 3.8+)
Démarrage rapide :
python3 scrape_server.py
Avec systemd : voir torrent-scrape.service
Usage : GET http://127.0.0.1:8765/?hash=<40_hex_chars>
GET http://127.0.0.1:8765/?magnet=<magnet_uri>
Réponse JSON :
{"seeders": n, "leechers": n, "health": "...", "popularity": "...", "sources": n}
"""
import json
import re
import time
import urllib.request
import urllib.parse
from http.server import HTTPServer, BaseHTTPRequestHandler
from concurrent.futures import ThreadPoolExecutor, as_completed
# ---------------------------------------------------------------
# Configuration
# ---------------------------------------------------------------
HOST = '127.0.0.1' # Écouter uniquement en local (nginx fait le proxy)
PORT = 8765
WORKERS = 10 # Requêtes parallèles vers les trackers
TIMEOUT = 7 # Secondes par tracker
CACHE_TTL = 300 # Durée du cache en secondes (5 min)
TRACKERS = [
'http://tracker.opentrackr.org:1337/scrape',
'http://open.tracker.cl:1337/scrape',
'http://tracker.openbittorrent.com:80/scrape',
'http://tracker.torrent.eu.org:451/scrape',
'http://tracker.tiny-vps.com:6969/scrape',
'http://tracker.files.fm:6969/scrape',
'http://tracker1.bt.moack.co.kr:80/scrape',
'http://tracker.leechersparadise.org:6969/scrape',
'http://open.stealth.si:80/scrape',
'http://tracker4.itzmx.com:2710/scrape',
]
# ---------------------------------------------------------------
# Cache en mémoire { hash_hex: (timestamp, data_dict) }
# Renvoie les dernières données connues si les trackers sont muets.
# ---------------------------------------------------------------
_cache: dict = {}
# ---------------------------------------------------------------
# Décodeur bencoding (format réponse tracker)
# ---------------------------------------------------------------
def bdecode(data: bytearray, pos: list) -> object:
c = data[pos[0]]
# Entier : i<n>e
if c == ord('i'):
pos[0] += 1
end = data.index(ord('e'), pos[0])
n = int(data[pos[0]:end])
pos[0] = end + 1
return n
# Liste : l<items>e
if c == ord('l'):
pos[0] += 1
lst = []
while data[pos[0]] != ord('e'):
lst.append(bdecode(data, pos))
pos[0] += 1
return lst
# Dictionnaire : d<key><value>...e
if c == ord('d'):
pos[0] += 1
d = {}
while data[pos[0]] != ord('e'):
key = bdecode(data, pos)
val = bdecode(data, pos)
# Clé binaire (ex. info hash 20 octets) → hex string
if isinstance(key, (bytes, bytearray)):
key = key.hex()
d[str(key)] = val
pos[0] += 1
return d
# Chaîne : <longueur>:<données>
if chr(c).isdigit():
colon = data.index(ord(':'), pos[0])
length = int(data[pos[0]:colon])
pos[0] = colon + 1
raw = data[pos[0]:pos[0] + length]
pos[0] += length
try:
# Texte ASCII → str ; données binaires → bytes
decoded = raw.decode('ascii')
return decoded
except (UnicodeDecodeError, ValueError):
return bytes(raw)
return None
# ---------------------------------------------------------------
# Scrape d'un tracker
# ---------------------------------------------------------------
def scrape_tracker(tracker_url: str, hash_hex: str) -> dict | None:
hash_bytes = bytes.fromhex(hash_hex)
encoded = urllib.parse.quote(hash_bytes, safe='')
url = f"{tracker_url}?info_hash={encoded}"
try:
req = urllib.request.Request(
url,
headers={'User-Agent': 'TorrentIndicator/1.0'}
)
with urllib.request.urlopen(req, timeout=TIMEOUT) as resp:
raw = resp.read()
except Exception:
return None
try:
parsed = bdecode(bytearray(raw), [0])
except Exception:
return None
if not isinstance(parsed, dict) or 'files' not in parsed:
return None
files = parsed['files']
if not isinstance(files, dict):
return None
for file_data in files.values():
if isinstance(file_data, dict):
return {
'seeders': int(file_data.get('complete', 0) or 0),
'leechers': int(file_data.get('incomplete', 0) or 0),
}
return None
# ---------------------------------------------------------------
# Parsing du magnet link
# ---------------------------------------------------------------
def extract_hash(magnet: str) -> str:
# Hex 40 chars
m = re.search(r'xt=urn:btih:([0-9a-fA-F]{40})', magnet, re.I)
if m:
return m.group(1).lower()
# Base32 32 chars
m = re.search(r'xt=urn:btih:([A-Z2-7]{32})', magnet, re.I)
if m:
return _base32_to_hex(m.group(1).upper())
return ''
def _base32_to_hex(s: str) -> str:
alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'
buf, bits, out = 0, 0, []
for c in s:
val = alphabet.find(c)
if val < 0:
continue
buf = (buf << 5) | val
bits += 5
if bits >= 8:
bits -= 8
out.append((buf >> bits) & 0xFF)
return bytes(out).hex()
# ---------------------------------------------------------------
# Calculs santé / popularité
# ---------------------------------------------------------------
def compute_health(seeders: int, leechers: int) -> str:
if seeders == 0:
return 'dead'
ratio = seeders / max(1, seeders + leechers)
if ratio >= 0.5:
return 'excellent'
if ratio >= 0.2:
return 'good'
return 'poor'
def compute_popularity(total: int) -> str:
if total >= 1000: return 'viral'
if total >= 100: return 'popular'
if total >= 10: return 'moderate'
return 'low'
# ---------------------------------------------------------------
# Serveur HTTP
# ---------------------------------------------------------------
class ScrapeHandler(BaseHTTPRequestHandler):
def log_message(self, *args):
pass # Désactiver les logs par défaut (gérer via systemd journal)
def do_OPTIONS(self):
self.send_response(204)
self._add_cors()
self.end_headers()
def do_GET(self):
parsed = urllib.parse.urlparse(self.path)
params = urllib.parse.parse_qs(parsed.query)
hash_hex = params.get('hash', [''])[0].strip().lower()
magnet = params.get('magnet', [''])[0].strip()
if not hash_hex and magnet:
hash_hex = extract_hash(urllib.parse.unquote(magnet))
if not re.fullmatch(r'[0-9a-f]{40}', hash_hex):
self._send_json({'error': 'Hash invalide. Fournissez ?hash= (40 hex) ou ?magnet=.'}, 400)
return
# Vérifier le cache avant d'interroger les trackers
cached = _cache.get(hash_hex)
if cached and (time.time() - cached[0]) < CACHE_TTL:
self._send_json(cached[1])
return
best_seeders = 0
best_leechers = 0
sources = 0
with ThreadPoolExecutor(max_workers=WORKERS) as executor:
futures = {
executor.submit(scrape_tracker, tracker, hash_hex): tracker
for tracker in TRACKERS
}
for future in as_completed(futures):
result = future.result()
if result:
if result['seeders'] > best_seeders: best_seeders = result['seeders']
if result['leechers'] > best_leechers: best_leechers = result['leechers']
sources += 1
if sources == 0 and cached:
# Aucun tracker n'a répondu : renvoyer le cache même expiré
# plutôt qu'une erreur visible
stale = dict(cached[1])
stale['stale'] = True
self._send_json(stale)
return
data = {
'seeders': best_seeders,
'leechers': best_leechers,
'health': compute_health(best_seeders, best_leechers),
'popularity': compute_popularity(best_seeders + best_leechers),
'sources': sources,
}
# Mettre en cache uniquement si au moins un tracker a répondu
if sources > 0:
_cache[hash_hex] = (time.time(), data)
self._send_json(data)
def _send_json(self, data: dict, status: int = 200):
body = json.dumps(data).encode('utf-8')
self.send_response(status)
self.send_header('Content-Type', 'application/json; charset=utf-8')
self.send_header('Content-Length', str(len(body)))
self.send_header('Cache-Control', 'no-store')
self._add_cors()
self.end_headers()
self.wfile.write(body)
def _add_cors(self):
self.send_header('Access-Control-Allow-Origin', '*')
self.send_header('Access-Control-Allow-Methods', 'GET, OPTIONS')
self.send_header('Access-Control-Allow-Headers', 'Content-Type')
# ---------------------------------------------------------------
# Point d'entrée
# ---------------------------------------------------------------
if __name__ == '__main__':
server = HTTPServer((HOST, PORT), ScrapeHandler)
print(f"Torrent scrape server → http://{HOST}:{PORT}")
print("Arrêt : Ctrl+C")
try:
server.serve_forever()
except KeyboardInterrupt:
print("\nServeur arrêté.")