Commit initial
This commit is contained in:
300
scrape_server.py
Normal file
300
scrape_server.py
Normal file
@@ -0,0 +1,300 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Torrent Tracker Scrape Server
|
||||
Remplacement auto-hébergé de scrape.php / du Cloudflare Worker.
|
||||
Dépendances : aucune (stdlib Python 3.8+)
|
||||
|
||||
Démarrage rapide :
|
||||
python3 scrape_server.py
|
||||
|
||||
Avec systemd : voir torrent-scrape.service
|
||||
|
||||
Usage : GET http://127.0.0.1:8765/?hash=<40_hex_chars>
|
||||
GET http://127.0.0.1:8765/?magnet=<magnet_uri>
|
||||
|
||||
Réponse JSON :
|
||||
{"seeders": n, "leechers": n, "health": "...", "popularity": "...", "sources": n}
|
||||
"""
|
||||
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
import urllib.request
|
||||
import urllib.parse
|
||||
from http.server import HTTPServer, BaseHTTPRequestHandler
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# Configuration
|
||||
# ---------------------------------------------------------------
|
||||
|
||||
HOST = '127.0.0.1' # Écouter uniquement en local (nginx fait le proxy)
|
||||
PORT = 8765
|
||||
WORKERS = 10 # Requêtes parallèles vers les trackers
|
||||
TIMEOUT = 7 # Secondes par tracker
|
||||
CACHE_TTL = 300 # Durée du cache en secondes (5 min)
|
||||
|
||||
TRACKERS = [
|
||||
'http://tracker.opentrackr.org:1337/scrape',
|
||||
'http://open.tracker.cl:1337/scrape',
|
||||
'http://tracker.openbittorrent.com:80/scrape',
|
||||
'http://tracker.torrent.eu.org:451/scrape',
|
||||
'http://tracker.tiny-vps.com:6969/scrape',
|
||||
'http://tracker.files.fm:6969/scrape',
|
||||
'http://tracker1.bt.moack.co.kr:80/scrape',
|
||||
'http://tracker.leechersparadise.org:6969/scrape',
|
||||
'http://open.stealth.si:80/scrape',
|
||||
'http://tracker4.itzmx.com:2710/scrape',
|
||||
]
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# Cache en mémoire { hash_hex: (timestamp, data_dict) }
|
||||
# Renvoie les dernières données connues si les trackers sont muets.
|
||||
# ---------------------------------------------------------------
|
||||
|
||||
_cache: dict = {}
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# Décodeur bencoding (format réponse tracker)
|
||||
# ---------------------------------------------------------------
|
||||
|
||||
def bdecode(data: bytearray, pos: list) -> object:
|
||||
c = data[pos[0]]
|
||||
|
||||
# Entier : i<n>e
|
||||
if c == ord('i'):
|
||||
pos[0] += 1
|
||||
end = data.index(ord('e'), pos[0])
|
||||
n = int(data[pos[0]:end])
|
||||
pos[0] = end + 1
|
||||
return n
|
||||
|
||||
# Liste : l<items>e
|
||||
if c == ord('l'):
|
||||
pos[0] += 1
|
||||
lst = []
|
||||
while data[pos[0]] != ord('e'):
|
||||
lst.append(bdecode(data, pos))
|
||||
pos[0] += 1
|
||||
return lst
|
||||
|
||||
# Dictionnaire : d<key><value>...e
|
||||
if c == ord('d'):
|
||||
pos[0] += 1
|
||||
d = {}
|
||||
while data[pos[0]] != ord('e'):
|
||||
key = bdecode(data, pos)
|
||||
val = bdecode(data, pos)
|
||||
# Clé binaire (ex. info hash 20 octets) → hex string
|
||||
if isinstance(key, (bytes, bytearray)):
|
||||
key = key.hex()
|
||||
d[str(key)] = val
|
||||
pos[0] += 1
|
||||
return d
|
||||
|
||||
# Chaîne : <longueur>:<données>
|
||||
if chr(c).isdigit():
|
||||
colon = data.index(ord(':'), pos[0])
|
||||
length = int(data[pos[0]:colon])
|
||||
pos[0] = colon + 1
|
||||
raw = data[pos[0]:pos[0] + length]
|
||||
pos[0] += length
|
||||
try:
|
||||
# Texte ASCII → str ; données binaires → bytes
|
||||
decoded = raw.decode('ascii')
|
||||
return decoded
|
||||
except (UnicodeDecodeError, ValueError):
|
||||
return bytes(raw)
|
||||
|
||||
return None
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# Scrape d'un tracker
|
||||
# ---------------------------------------------------------------
|
||||
|
||||
def scrape_tracker(tracker_url: str, hash_hex: str) -> dict | None:
|
||||
hash_bytes = bytes.fromhex(hash_hex)
|
||||
encoded = urllib.parse.quote(hash_bytes, safe='')
|
||||
url = f"{tracker_url}?info_hash={encoded}"
|
||||
|
||||
try:
|
||||
req = urllib.request.Request(
|
||||
url,
|
||||
headers={'User-Agent': 'TorrentIndicator/1.0'}
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=TIMEOUT) as resp:
|
||||
raw = resp.read()
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
try:
|
||||
parsed = bdecode(bytearray(raw), [0])
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
if not isinstance(parsed, dict) or 'files' not in parsed:
|
||||
return None
|
||||
|
||||
files = parsed['files']
|
||||
if not isinstance(files, dict):
|
||||
return None
|
||||
|
||||
for file_data in files.values():
|
||||
if isinstance(file_data, dict):
|
||||
return {
|
||||
'seeders': int(file_data.get('complete', 0) or 0),
|
||||
'leechers': int(file_data.get('incomplete', 0) or 0),
|
||||
}
|
||||
|
||||
return None
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# Parsing du magnet link
|
||||
# ---------------------------------------------------------------
|
||||
|
||||
def extract_hash(magnet: str) -> str:
|
||||
# Hex 40 chars
|
||||
m = re.search(r'xt=urn:btih:([0-9a-fA-F]{40})', magnet, re.I)
|
||||
if m:
|
||||
return m.group(1).lower()
|
||||
|
||||
# Base32 32 chars
|
||||
m = re.search(r'xt=urn:btih:([A-Z2-7]{32})', magnet, re.I)
|
||||
if m:
|
||||
return _base32_to_hex(m.group(1).upper())
|
||||
|
||||
return ''
|
||||
|
||||
def _base32_to_hex(s: str) -> str:
|
||||
alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'
|
||||
buf, bits, out = 0, 0, []
|
||||
for c in s:
|
||||
val = alphabet.find(c)
|
||||
if val < 0:
|
||||
continue
|
||||
buf = (buf << 5) | val
|
||||
bits += 5
|
||||
if bits >= 8:
|
||||
bits -= 8
|
||||
out.append((buf >> bits) & 0xFF)
|
||||
return bytes(out).hex()
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# Calculs santé / popularité
|
||||
# ---------------------------------------------------------------
|
||||
|
||||
def compute_health(seeders: int, leechers: int) -> str:
|
||||
if seeders == 0:
|
||||
return 'dead'
|
||||
ratio = seeders / max(1, seeders + leechers)
|
||||
if ratio >= 0.5:
|
||||
return 'excellent'
|
||||
if ratio >= 0.2:
|
||||
return 'good'
|
||||
return 'poor'
|
||||
|
||||
def compute_popularity(total: int) -> str:
|
||||
if total >= 1000: return 'viral'
|
||||
if total >= 100: return 'popular'
|
||||
if total >= 10: return 'moderate'
|
||||
return 'low'
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# Serveur HTTP
|
||||
# ---------------------------------------------------------------
|
||||
|
||||
class ScrapeHandler(BaseHTTPRequestHandler):
|
||||
|
||||
def log_message(self, *args):
|
||||
pass # Désactiver les logs par défaut (gérer via systemd journal)
|
||||
|
||||
def do_OPTIONS(self):
|
||||
self.send_response(204)
|
||||
self._add_cors()
|
||||
self.end_headers()
|
||||
|
||||
def do_GET(self):
|
||||
parsed = urllib.parse.urlparse(self.path)
|
||||
params = urllib.parse.parse_qs(parsed.query)
|
||||
|
||||
hash_hex = params.get('hash', [''])[0].strip().lower()
|
||||
magnet = params.get('magnet', [''])[0].strip()
|
||||
|
||||
if not hash_hex and magnet:
|
||||
hash_hex = extract_hash(urllib.parse.unquote(magnet))
|
||||
|
||||
if not re.fullmatch(r'[0-9a-f]{40}', hash_hex):
|
||||
self._send_json({'error': 'Hash invalide. Fournissez ?hash= (40 hex) ou ?magnet=.'}, 400)
|
||||
return
|
||||
|
||||
# Vérifier le cache avant d'interroger les trackers
|
||||
cached = _cache.get(hash_hex)
|
||||
if cached and (time.time() - cached[0]) < CACHE_TTL:
|
||||
self._send_json(cached[1])
|
||||
return
|
||||
|
||||
best_seeders = 0
|
||||
best_leechers = 0
|
||||
sources = 0
|
||||
|
||||
with ThreadPoolExecutor(max_workers=WORKERS) as executor:
|
||||
futures = {
|
||||
executor.submit(scrape_tracker, tracker, hash_hex): tracker
|
||||
for tracker in TRACKERS
|
||||
}
|
||||
for future in as_completed(futures):
|
||||
result = future.result()
|
||||
if result:
|
||||
if result['seeders'] > best_seeders: best_seeders = result['seeders']
|
||||
if result['leechers'] > best_leechers: best_leechers = result['leechers']
|
||||
sources += 1
|
||||
|
||||
if sources == 0 and cached:
|
||||
# Aucun tracker n'a répondu : renvoyer le cache même expiré
|
||||
# plutôt qu'une erreur visible
|
||||
stale = dict(cached[1])
|
||||
stale['stale'] = True
|
||||
self._send_json(stale)
|
||||
return
|
||||
|
||||
data = {
|
||||
'seeders': best_seeders,
|
||||
'leechers': best_leechers,
|
||||
'health': compute_health(best_seeders, best_leechers),
|
||||
'popularity': compute_popularity(best_seeders + best_leechers),
|
||||
'sources': sources,
|
||||
}
|
||||
|
||||
# Mettre en cache uniquement si au moins un tracker a répondu
|
||||
if sources > 0:
|
||||
_cache[hash_hex] = (time.time(), data)
|
||||
|
||||
self._send_json(data)
|
||||
|
||||
def _send_json(self, data: dict, status: int = 200):
|
||||
body = json.dumps(data).encode('utf-8')
|
||||
self.send_response(status)
|
||||
self.send_header('Content-Type', 'application/json; charset=utf-8')
|
||||
self.send_header('Content-Length', str(len(body)))
|
||||
self.send_header('Cache-Control', 'no-store')
|
||||
self._add_cors()
|
||||
self.end_headers()
|
||||
self.wfile.write(body)
|
||||
|
||||
def _add_cors(self):
|
||||
self.send_header('Access-Control-Allow-Origin', '*')
|
||||
self.send_header('Access-Control-Allow-Methods', 'GET, OPTIONS')
|
||||
self.send_header('Access-Control-Allow-Headers', 'Content-Type')
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# Point d'entrée
|
||||
# ---------------------------------------------------------------
|
||||
|
||||
if __name__ == '__main__':
|
||||
server = HTTPServer((HOST, PORT), ScrapeHandler)
|
||||
print(f"Torrent scrape server → http://{HOST}:{PORT}")
|
||||
print("Arrêt : Ctrl+C")
|
||||
try:
|
||||
server.serve_forever()
|
||||
except KeyboardInterrupt:
|
||||
print("\nServeur arrêté.")
|
||||
Reference in New Issue
Block a user