diff --git a/README.md b/README.md index 6a26a3d..fe6bd3e 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,6 @@ # Kawai -<<<<<<< HEAD Local AI image and video generator. Simple UI. NSFW-capable. Auto GPU detection (Nvidia / AMD / Intel / Apple Silicon / CPU). -======= -<<<<<<< HEAD -Local AI image and video generator. Simple UI. NSFW-capable. Auto GPU detection (Nvidia / AMD / Intel / Apple Silicon / CPU). -======= -Local AI image and video generator. Simple UI. NSFW-capable. Auto GPU detection (Nvidia / AMD / Intel / CPU). ->>>>>>> refs/remotes/azuze/main ->>>>>>> 965a3d97c6dae38fa25174559b1ea0f3050788f9 ## Quick start @@ -18,10 +10,6 @@ python launcher.py Run with **any Python** you have installed. The launcher bootstraps `uv` and uses it to fetch a clean Python 3.11 runtime + venv, then installs the right PyTorch build for your GPU. Nothing about your system Python is touched. -<<<<<<< HEAD -======= -<<<<<<< HEAD ->>>>>>> 965a3d97c6dae38fa25174559b1ea0f3050788f9 Works on Windows, Linux, and macOS. First run takes a few minutes (uv install + Python 3.11 download + torch + dependencies). Subsequent runs start instantly. @@ -41,26 +29,11 @@ python launcher.py --reinstall # wipe install marker, re-detect, reinstal `--vendor {nvidia,amd,intel,cpu}` is available too if you need to pair (e.g. `--backend directml --vendor intel`). Override is persisted in `config.local.json` and survives relaunches until you pass `--backend` again or `--reinstall`. -<<<<<<< HEAD -======= -======= -First run takes a few minutes (uv install + Python 3.11 download + torch + dependencies). Subsequent runs start instantly. - ->>>>>>> refs/remotes/azuze/main ->>>>>>> 965a3d97c6dae38fa25174559b1ea0f3050788f9 ### What the launcher does 1. Installs `uv` to `.tools/` if not present. 2. Creates `venv/` with Python 3.11 (uv downloads the interpreter on demand). -<<<<<<< HEAD 3. Detects GPU (Nvidia / AMD / Intel / Apple Silicon / CPU) and installs matching PyTorch wheel. -======= -<<<<<<< HEAD -3. Detects GPU (Nvidia / AMD / Intel / Apple Silicon / CPU) and installs matching PyTorch wheel. -======= -3. Detects GPU (Nvidia / AMD / Intel / CPU) and installs matching PyTorch wheel. ->>>>>>> refs/remotes/azuze/main ->>>>>>> 965a3d97c6dae38fa25174559b1ea0f3050788f9 4. Installs latest `diffusers`, `transformers`, etc. 5. Opens browser UI at `http://127.0.0.1:7860`. @@ -85,12 +58,4 @@ CSAM detection on all outputs (NudeNet age classifier + hash check). All other c ## Status -<<<<<<< HEAD Windows + Linux + macOS. AMD on Linux uses ROCm; AMD/Intel on Windows use DirectML; Apple Silicon uses MPS. Intel Macs run on CPU only (no GPU acceleration path). -======= -<<<<<<< HEAD -Windows + Linux + macOS. AMD on Linux uses ROCm; AMD/Intel on Windows use DirectML; Apple Silicon uses MPS. Intel Macs run on CPU only (no GPU acceleration path). -======= -Windows only. Linux support planned. ->>>>>>> refs/remotes/azuze/main ->>>>>>> 965a3d97c6dae38fa25174559b1ea0f3050788f9 diff --git a/backends/device.py b/backends/device.py index a02fd40..0f7ef69 100644 --- a/backends/device.py +++ b/backends/device.py @@ -28,17 +28,8 @@ def hardware_info() -> dict: def get_device(): import torch backend = hardware_info()["backend"] -<<<<<<< HEAD # ROCm builds of torch expose the cuda namespace. if backend in ("cuda", "rocm") and torch.cuda.is_available(): -======= -<<<<<<< HEAD - # ROCm builds of torch expose the cuda namespace. - if backend in ("cuda", "rocm") and torch.cuda.is_available(): -======= - if backend == "cuda" and torch.cuda.is_available(): ->>>>>>> refs/remotes/azuze/main ->>>>>>> 965a3d97c6dae38fa25174559b1ea0f3050788f9 return torch.device("cuda") if backend == "directml": try: @@ -46,16 +37,8 @@ def get_device(): return torch_directml.device() except ImportError: pass -<<<<<<< HEAD if backend == "mps" and getattr(torch.backends, "mps", None) and torch.backends.mps.is_available(): return torch.device("mps") -======= -<<<<<<< HEAD - if backend == "mps" and getattr(torch.backends, "mps", None) and torch.backends.mps.is_available(): - return torch.device("mps") -======= ->>>>>>> refs/remotes/azuze/main ->>>>>>> 965a3d97c6dae38fa25174559b1ea0f3050788f9 return torch.device("cpu") @@ -64,12 +47,5 @@ def torch_dtype(): backend = hardware_info()["backend"] if backend == "cpu": return torch.float32 -<<<<<<< HEAD # MPS supports fp16 for diffusers; bf16 has gaps. Stick with fp16. -======= -<<<<<<< HEAD - # MPS supports fp16 for diffusers; bf16 has gaps. Stick with fp16. -======= ->>>>>>> refs/remotes/azuze/main ->>>>>>> 965a3d97c6dae38fa25174559b1ea0f3050788f9 return torch.float16 diff --git a/backends/hardware.py b/backends/hardware.py index e7345ef..15b04e2 100644 --- a/backends/hardware.py +++ b/backends/hardware.py @@ -1,32 +1,14 @@ """GPU and VRAM detection. Returns vendor + tier used to pick torch wheel and default models.""" from __future__ import annotations -<<<<<<< HEAD -======= -<<<<<<< HEAD -======= -import ctypes ->>>>>>> refs/remotes/azuze/main ->>>>>>> 965a3d97c6dae38fa25174559b1ea0f3050788f9 import platform import subprocess from dataclasses import dataclass from typing import Literal -<<<<<<< HEAD Vendor = Literal["nvidia", "amd", "intel", "apple", "cpu"] Backend = Literal["cuda", "rocm", "directml", "mps", "cpu"] SUPPORTED_BACKENDS: tuple[str, ...] = ("auto", "cuda", "rocm", "directml", "mps", "cpu") -======= -<<<<<<< HEAD -Vendor = Literal["nvidia", "amd", "intel", "apple", "cpu"] -Backend = Literal["cuda", "rocm", "directml", "mps", "cpu"] -SUPPORTED_BACKENDS: tuple[str, ...] = ("auto", "cuda", "rocm", "directml", "mps", "cpu") -======= -Vendor = Literal["nvidia", "amd", "intel", "cpu"] -Backend = Literal["cuda", "directml", "cpu"] ->>>>>>> refs/remotes/azuze/main ->>>>>>> 965a3d97c6dae38fa25174559b1ea0f3050788f9 @dataclass @@ -38,10 +20,6 @@ class HardwareInfo: tier: Literal["cpu", "low", "mid", "high", "ultra"] -<<<<<<< HEAD -======= -<<<<<<< HEAD ->>>>>>> 965a3d97c6dae38fa25174559b1ea0f3050788f9 def _detect_mac_gpu() -> tuple[str, float] | None: """Apple Silicon: report chip name + unified memory (proxy for VRAM). Intel Mac: returns None (no GPU acceleration path).""" @@ -66,11 +44,6 @@ def _detect_mac_gpu() -> tuple[str, float] | None: return name, vram_gb -<<<<<<< HEAD -======= -======= ->>>>>>> refs/remotes/azuze/main ->>>>>>> 965a3d97c6dae38fa25174559b1ea0f3050788f9 def _run(cmd: list[str]) -> str: try: out = subprocess.run(cmd, capture_output=True, text=True, timeout=10, check=False) @@ -127,10 +100,6 @@ def _detect_dxgi() -> list[tuple[str, float, str]]: return results -<<<<<<< HEAD -======= -<<<<<<< HEAD ->>>>>>> 965a3d97c6dae38fa25174559b1ea0f3050788f9 def _detect_linux_gpus() -> list[tuple[str, float, str]]: """Enumerate Linux GPUs. Returns list of (name, vram_gb, vendor_hint).""" if platform.system() != "Linux": @@ -191,11 +160,6 @@ def _vendor_from_backend(backend: str) -> Vendor: }.get(backend, "cpu") # type: ignore[return-value] -<<<<<<< HEAD -======= -======= ->>>>>>> refs/remotes/azuze/main ->>>>>>> 965a3d97c6dae38fa25174559b1ea0f3050788f9 def _vram_tier(vram_gb: float) -> Literal["cpu", "low", "mid", "high", "ultra"]: if vram_gb < 1: return "cpu" @@ -208,10 +172,6 @@ def _vram_tier(vram_gb: float) -> Literal["cpu", "low", "mid", "high", "ultra"]: return "ultra" -<<<<<<< HEAD -======= -<<<<<<< HEAD ->>>>>>> 965a3d97c6dae38fa25174559b1ea0f3050788f9 def detect(force_backend: str | None = None, force_vendor: str | None = None) -> HardwareInfo: """Auto-detect hardware. If force_backend is set (cuda/rocm/directml/cpu), skip detection for that decision but still try to discover device name + VRAM for tier sizing.""" @@ -267,21 +227,10 @@ def detect(force_backend: str | None = None, force_vendor: str | None = None) -> name, vram = mac_gpu return HardwareInfo("apple", "mps", name, vram, _vram_tier(vram)) -<<<<<<< HEAD -======= -======= -def detect() -> HardwareInfo: - nv = _detect_nvidia() ->>>>>>> refs/remotes/azuze/main ->>>>>>> 965a3d97c6dae38fa25174559b1ea0f3050788f9 if nv: name, vram = nv return HardwareInfo("nvidia", "cuda", name, vram, _vram_tier(vram)) -<<<<<<< HEAD -======= -<<<<<<< HEAD ->>>>>>> 965a3d97c6dae38fa25174559b1ea0f3050788f9 if is_windows and win_adapters: win_adapters.sort(key=lambda a: a[1], reverse=True) name, vram, hint = win_adapters[0] @@ -305,26 +254,6 @@ def detect() -> HardwareInfo: # No good Intel-on-Linux torch path here; default to CPU. return HardwareInfo("cpu", "cpu", f"Intel GPU (no backend) — {name}", 0.0, "cpu") -<<<<<<< HEAD -======= -======= - adapters = _detect_dxgi() - # Prefer discrete (highest VRAM) non-basic adapter - adapters = [a for a in adapters if "basic" not in a[0].lower() and "microsoft" not in a[0].lower()] - if adapters: - adapters.sort(key=lambda a: a[1], reverse=True) - name, vram, hint = adapters[0] - # AdapterRAM is unreliable for >4GB cards. If exactly 4GB and modern AMD/Intel card name, bump. - if vram <= 4.1 and any(k in name.lower() for k in ("rx 6", "rx 7", "arc a", "arc b")): - vram = 8.0 # conservative guess - if hint in ("amd", "intel"): - return HardwareInfo(hint, "directml", name, vram, _vram_tier(vram)) - if hint == "nvidia": - # nvidia-smi missing but card is nvidia: drivers may be broken, fall through to directml - return HardwareInfo("nvidia", "directml", name, vram, _vram_tier(vram)) - ->>>>>>> refs/remotes/azuze/main ->>>>>>> 965a3d97c6dae38fa25174559b1ea0f3050788f9 return HardwareInfo("cpu", "cpu", platform.processor() or "CPU", 0.0, "cpu") @@ -348,10 +277,6 @@ def torch_install_args(info: HardwareInfo) -> list[str]: "--index-url", "https://download.pytorch.org/whl/cu124", ] -<<<<<<< HEAD -======= -<<<<<<< HEAD ->>>>>>> 965a3d97c6dae38fa25174559b1ea0f3050788f9 if info.backend == "rocm": # ROCm wheels are Linux-only. Index pinned to a stable ROCm release line. return [ @@ -360,11 +285,6 @@ def torch_install_args(info: HardwareInfo) -> list[str]: "--index-url", "https://download.pytorch.org/whl/rocm6.1", ] -<<<<<<< HEAD -======= -======= ->>>>>>> refs/remotes/azuze/main ->>>>>>> 965a3d97c6dae38fa25174559b1ea0f3050788f9 if info.backend == "directml": # torch-directml currently pins to torch 2.4.x. Match it. return [ @@ -372,21 +292,12 @@ def torch_install_args(info: HardwareInfo) -> list[str]: "torchvision>=0.19,<0.20", "torch-directml>=0.2.5", ] -<<<<<<< HEAD -======= -<<<<<<< HEAD ->>>>>>> 965a3d97c6dae38fa25174559b1ea0f3050788f9 if info.backend == "mps": # Default PyPI torch wheel ships MPS support on macOS arm64. No custom index. return ["torch", "torchvision"] # CPU. macOS uses default PyPI wheels (no /whl/cpu index for darwin). if platform.system() == "Darwin": return ["torch", "torchvision"] -<<<<<<< HEAD -======= -======= ->>>>>>> refs/remotes/azuze/main ->>>>>>> 965a3d97c6dae38fa25174559b1ea0f3050788f9 return [ "torch", "torchvision", diff --git a/backends/memory.py b/backends/memory.py index 336f8a2..c794dc4 100644 --- a/backends/memory.py +++ b/backends/memory.py @@ -30,20 +30,9 @@ def apply_memory_strategy(pipe) -> None: except Exception: pass -<<<<<<< HEAD if backend in ("cuda", "rocm"): # ROCm builds expose the cuda API, so accelerate offload hooks work the same way. # Offload only if VRAM tight. -======= -<<<<<<< HEAD - if backend in ("cuda", "rocm"): - # ROCm builds expose the cuda API, so accelerate offload hooks work the same way. - # Offload only if VRAM tight. -======= - if backend == "cuda": - # Offload only if VRAM tight. cpu_offload is CUDA-only via accelerate hooks. ->>>>>>> refs/remotes/azuze/main ->>>>>>> 965a3d97c6dae38fa25174559b1ea0f3050788f9 if vram < 10: try: pipe.enable_sequential_cpu_offload() @@ -68,10 +57,6 @@ def apply_memory_strategy(pipe) -> None: pipe.to("cpu") return -<<<<<<< HEAD -======= -<<<<<<< HEAD ->>>>>>> 965a3d97c6dae38fa25174559b1ea0f3050788f9 if backend == "mps": # Apple Silicon shares unified memory with CPU. accelerate's sequential offload # has spotty MPS support; rely on slicing/tiling already enabled above. @@ -94,10 +79,5 @@ def apply_memory_strategy(pipe) -> None: pipe.to("cpu") return -<<<<<<< HEAD -======= -======= ->>>>>>> refs/remotes/azuze/main ->>>>>>> 965a3d97c6dae38fa25174559b1ea0f3050788f9 # CPU pipe.to("cpu") diff --git a/launcher.py b/launcher.py index 4385eb4..477459f 100644 --- a/launcher.py +++ b/launcher.py @@ -11,14 +11,7 @@ uv install strategy (in order): """ from __future__ import annotations -<<<<<<< HEAD import argparse -======= -<<<<<<< HEAD -import argparse -======= ->>>>>>> refs/remotes/azuze/main ->>>>>>> 965a3d97c6dae38fa25174559b1ea0f3050788f9 import io import json import os @@ -214,10 +207,6 @@ def _uv_pip(uv: list[str], args: list[str]) -> None: subprocess.check_call(cmd) -<<<<<<< HEAD -======= -<<<<<<< HEAD ->>>>>>> 965a3d97c6dae38fa25174559b1ea0f3050788f9 def detect_and_install( uv: list[str], force_backend: str | None = None, @@ -232,17 +221,6 @@ def detect_and_install( f"[kawai] Backend: {info.backend}{forced_note} | " f"{info.vendor} / {info.device_name} / {info.vram_gb:.1f} GB / tier={info.tier}" ) -<<<<<<< HEAD -======= -======= -def detect_and_install(uv: list[str]) -> dict: - sys.path.insert(0, str(ROOT)) - from backends import hardware - - info = hardware.detect() - print(f"[kawai] Detected: {info.vendor} / {info.device_name} / {info.vram_gb:.1f} GB / tier={info.tier}") ->>>>>>> refs/remotes/azuze/main ->>>>>>> 965a3d97c6dae38fa25174559b1ea0f3050788f9 _uv_pip(uv, hardware.torch_install_args(info)) _uv_pip(uv, ["-r", str(ROOT / "requirements.txt")]) @@ -253,14 +231,7 @@ def detect_and_install(uv: list[str]) -> dict: "device_name": info.device_name, "vram_gb": info.vram_gb, "tier": info.tier, -<<<<<<< HEAD "forced": bool(force_backend and force_backend != "auto"), -======= -<<<<<<< HEAD - "forced": bool(force_backend and force_backend != "auto"), -======= ->>>>>>> refs/remotes/azuze/main ->>>>>>> 965a3d97c6dae38fa25174559b1ea0f3050788f9 } HARDWARE_CACHE.write_text(json.dumps(payload, indent=2)) MARKER.write_text("ok") @@ -274,24 +245,12 @@ def already_in_venv() -> bool: return False -<<<<<<< HEAD def relaunch_in_venv(forwarded_args: list[str]) -> None: -======= -<<<<<<< HEAD -def relaunch_in_venv(forwarded_args: list[str]) -> None: -======= -def relaunch_in_venv() -> None: ->>>>>>> refs/remotes/azuze/main ->>>>>>> 965a3d97c6dae38fa25174559b1ea0f3050788f9 """Re-exec the launcher inside the venv. Use subprocess on Windows because os.execv mangles argv with spaces in paths.""" print("[kawai] Relaunching inside venv...") py = str(venv_python()) script = str(ROOT / "launcher.py") -<<<<<<< HEAD -======= -<<<<<<< HEAD ->>>>>>> 965a3d97c6dae38fa25174559b1ea0f3050788f9 argv = [py, script, *forwarded_args] if os.name == "nt": result = subprocess.run(argv) @@ -340,23 +299,6 @@ def main() -> None: if not MARKER.exists(): uv = _ensure_uv() detect_and_install(uv, force_backend=forced, force_vendor=args.vendor) -<<<<<<< HEAD -======= -======= - if os.name == "nt": - result = subprocess.run([py, script]) - sys.exit(result.returncode) - else: - os.execv(py, [py, script]) - - -def main() -> None: - if already_in_venv(): - if not MARKER.exists(): - uv = _ensure_uv() - detect_and_install(uv) ->>>>>>> refs/remotes/azuze/main ->>>>>>> 965a3d97c6dae38fa25174559b1ea0f3050788f9 from app import run run() return @@ -364,18 +306,8 @@ def main() -> None: uv = _ensure_uv() _create_venv(uv) if not MARKER.exists(): -<<<<<<< HEAD detect_and_install(uv, force_backend=forced, force_vendor=args.vendor) relaunch_in_venv(sys.argv[1:]) -======= -<<<<<<< HEAD - detect_and_install(uv, force_backend=forced, force_vendor=args.vendor) - relaunch_in_venv(sys.argv[1:]) -======= - detect_and_install(uv) - relaunch_in_venv() ->>>>>>> refs/remotes/azuze/main ->>>>>>> 965a3d97c6dae38fa25174559b1ea0f3050788f9 if __name__ == "__main__":