Merge remote-tracking branch 'refs/remotes/azuze/main'

This commit is contained in:
2026-05-04 09:50:25 +02:00
5 changed files with 118 additions and 0 deletions
+17
View File
@@ -1,6 +1,10 @@
# Kawai # Kawai
<<<<<<< HEAD
Local AI image and video generator. Simple UI. NSFW-capable. Auto GPU detection (Nvidia / AMD / Intel / Apple Silicon / CPU). Local AI image and video generator. Simple UI. NSFW-capable. Auto GPU detection (Nvidia / AMD / Intel / Apple Silicon / CPU).
=======
Local AI image and video generator. Simple UI. NSFW-capable. Auto GPU detection (Nvidia / AMD / Intel / CPU).
>>>>>>> refs/remotes/azuze/main
## Quick start ## Quick start
@@ -10,6 +14,7 @@ python launcher.py
Run with **any Python** you have installed. The launcher bootstraps `uv` and uses it to fetch a clean Python 3.11 runtime + venv, then installs the right PyTorch build for your GPU. Nothing about your system Python is touched. Run with **any Python** you have installed. The launcher bootstraps `uv` and uses it to fetch a clean Python 3.11 runtime + venv, then installs the right PyTorch build for your GPU. Nothing about your system Python is touched.
<<<<<<< HEAD
Works on Windows, Linux, and macOS. Works on Windows, Linux, and macOS.
First run takes a few minutes (uv install + Python 3.11 download + torch + dependencies). Subsequent runs start instantly. First run takes a few minutes (uv install + Python 3.11 download + torch + dependencies). Subsequent runs start instantly.
@@ -29,11 +34,19 @@ python launcher.py --reinstall # wipe install marker, re-detect, reinstal
`--vendor {nvidia,amd,intel,cpu}` is available too if you need to pair (e.g. `--backend directml --vendor intel`). Override is persisted in `config.local.json` and survives relaunches until you pass `--backend` again or `--reinstall`. `--vendor {nvidia,amd,intel,cpu}` is available too if you need to pair (e.g. `--backend directml --vendor intel`). Override is persisted in `config.local.json` and survives relaunches until you pass `--backend` again or `--reinstall`.
=======
First run takes a few minutes (uv install + Python 3.11 download + torch + dependencies). Subsequent runs start instantly.
>>>>>>> refs/remotes/azuze/main
### What the launcher does ### What the launcher does
1. Installs `uv` to `.tools/` if not present. 1. Installs `uv` to `.tools/` if not present.
2. Creates `venv/` with Python 3.11 (uv downloads the interpreter on demand). 2. Creates `venv/` with Python 3.11 (uv downloads the interpreter on demand).
<<<<<<< HEAD
3. Detects GPU (Nvidia / AMD / Intel / Apple Silicon / CPU) and installs matching PyTorch wheel. 3. Detects GPU (Nvidia / AMD / Intel / Apple Silicon / CPU) and installs matching PyTorch wheel.
=======
3. Detects GPU (Nvidia / AMD / Intel / CPU) and installs matching PyTorch wheel.
>>>>>>> refs/remotes/azuze/main
4. Installs latest `diffusers`, `transformers`, etc. 4. Installs latest `diffusers`, `transformers`, etc.
5. Opens browser UI at `http://127.0.0.1:7860`. 5. Opens browser UI at `http://127.0.0.1:7860`.
@@ -58,4 +71,8 @@ CSAM detection on all outputs (NudeNet age classifier + hash check). All other c
## Status ## Status
<<<<<<< HEAD
Windows + Linux + macOS. AMD on Linux uses ROCm; AMD/Intel on Windows use DirectML; Apple Silicon uses MPS. Intel Macs run on CPU only (no GPU acceleration path). Windows + Linux + macOS. AMD on Linux uses ROCm; AMD/Intel on Windows use DirectML; Apple Silicon uses MPS. Intel Macs run on CPU only (no GPU acceleration path).
=======
Windows only. Linux support planned.
>>>>>>> refs/remotes/azuze/main
+10
View File
@@ -28,8 +28,12 @@ def hardware_info() -> dict:
def get_device(): def get_device():
import torch import torch
backend = hardware_info()["backend"] backend = hardware_info()["backend"]
<<<<<<< HEAD
# ROCm builds of torch expose the cuda namespace. # ROCm builds of torch expose the cuda namespace.
if backend in ("cuda", "rocm") and torch.cuda.is_available(): if backend in ("cuda", "rocm") and torch.cuda.is_available():
=======
if backend == "cuda" and torch.cuda.is_available():
>>>>>>> refs/remotes/azuze/main
return torch.device("cuda") return torch.device("cuda")
if backend == "directml": if backend == "directml":
try: try:
@@ -37,8 +41,11 @@ def get_device():
return torch_directml.device() return torch_directml.device()
except ImportError: except ImportError:
pass pass
<<<<<<< HEAD
if backend == "mps" and getattr(torch.backends, "mps", None) and torch.backends.mps.is_available(): if backend == "mps" and getattr(torch.backends, "mps", None) and torch.backends.mps.is_available():
return torch.device("mps") return torch.device("mps")
=======
>>>>>>> refs/remotes/azuze/main
return torch.device("cpu") return torch.device("cpu")
@@ -47,5 +54,8 @@ def torch_dtype():
backend = hardware_info()["backend"] backend = hardware_info()["backend"]
if backend == "cpu": if backend == "cpu":
return torch.float32 return torch.float32
<<<<<<< HEAD
# MPS supports fp16 for diffusers; bf16 has gaps. Stick with fp16. # MPS supports fp16 for diffusers; bf16 has gaps. Stick with fp16.
=======
>>>>>>> refs/remotes/azuze/main
return torch.float16 return torch.float16
+44
View File
@@ -1,14 +1,23 @@
"""GPU and VRAM detection. Returns vendor + tier used to pick torch wheel and default models.""" """GPU and VRAM detection. Returns vendor + tier used to pick torch wheel and default models."""
from __future__ import annotations from __future__ import annotations
<<<<<<< HEAD
=======
import ctypes
>>>>>>> refs/remotes/azuze/main
import platform import platform
import subprocess import subprocess
from dataclasses import dataclass from dataclasses import dataclass
from typing import Literal from typing import Literal
<<<<<<< HEAD
Vendor = Literal["nvidia", "amd", "intel", "apple", "cpu"] Vendor = Literal["nvidia", "amd", "intel", "apple", "cpu"]
Backend = Literal["cuda", "rocm", "directml", "mps", "cpu"] Backend = Literal["cuda", "rocm", "directml", "mps", "cpu"]
SUPPORTED_BACKENDS: tuple[str, ...] = ("auto", "cuda", "rocm", "directml", "mps", "cpu") SUPPORTED_BACKENDS: tuple[str, ...] = ("auto", "cuda", "rocm", "directml", "mps", "cpu")
=======
Vendor = Literal["nvidia", "amd", "intel", "cpu"]
Backend = Literal["cuda", "directml", "cpu"]
>>>>>>> refs/remotes/azuze/main
@dataclass @dataclass
@@ -20,6 +29,7 @@ class HardwareInfo:
tier: Literal["cpu", "low", "mid", "high", "ultra"] tier: Literal["cpu", "low", "mid", "high", "ultra"]
<<<<<<< HEAD
def _detect_mac_gpu() -> tuple[str, float] | None: def _detect_mac_gpu() -> tuple[str, float] | None:
"""Apple Silicon: report chip name + unified memory (proxy for VRAM). """Apple Silicon: report chip name + unified memory (proxy for VRAM).
Intel Mac: returns None (no GPU acceleration path).""" Intel Mac: returns None (no GPU acceleration path)."""
@@ -44,6 +54,8 @@ def _detect_mac_gpu() -> tuple[str, float] | None:
return name, vram_gb return name, vram_gb
=======
>>>>>>> refs/remotes/azuze/main
def _run(cmd: list[str]) -> str: def _run(cmd: list[str]) -> str:
try: try:
out = subprocess.run(cmd, capture_output=True, text=True, timeout=10, check=False) out = subprocess.run(cmd, capture_output=True, text=True, timeout=10, check=False)
@@ -100,6 +112,7 @@ def _detect_dxgi() -> list[tuple[str, float, str]]:
return results return results
<<<<<<< HEAD
def _detect_linux_gpus() -> list[tuple[str, float, str]]: def _detect_linux_gpus() -> list[tuple[str, float, str]]:
"""Enumerate Linux GPUs. Returns list of (name, vram_gb, vendor_hint).""" """Enumerate Linux GPUs. Returns list of (name, vram_gb, vendor_hint)."""
if platform.system() != "Linux": if platform.system() != "Linux":
@@ -160,6 +173,8 @@ def _vendor_from_backend(backend: str) -> Vendor:
}.get(backend, "cpu") # type: ignore[return-value] }.get(backend, "cpu") # type: ignore[return-value]
=======
>>>>>>> refs/remotes/azuze/main
def _vram_tier(vram_gb: float) -> Literal["cpu", "low", "mid", "high", "ultra"]: def _vram_tier(vram_gb: float) -> Literal["cpu", "low", "mid", "high", "ultra"]:
if vram_gb < 1: if vram_gb < 1:
return "cpu" return "cpu"
@@ -172,6 +187,7 @@ def _vram_tier(vram_gb: float) -> Literal["cpu", "low", "mid", "high", "ultra"]:
return "ultra" return "ultra"
<<<<<<< HEAD
def detect(force_backend: str | None = None, force_vendor: str | None = None) -> HardwareInfo: def detect(force_backend: str | None = None, force_vendor: str | None = None) -> HardwareInfo:
"""Auto-detect hardware. If force_backend is set (cuda/rocm/directml/cpu), skip detection """Auto-detect hardware. If force_backend is set (cuda/rocm/directml/cpu), skip detection
for that decision but still try to discover device name + VRAM for tier sizing.""" for that decision but still try to discover device name + VRAM for tier sizing."""
@@ -227,10 +243,15 @@ def detect(force_backend: str | None = None, force_vendor: str | None = None) ->
name, vram = mac_gpu name, vram = mac_gpu
return HardwareInfo("apple", "mps", name, vram, _vram_tier(vram)) return HardwareInfo("apple", "mps", name, vram, _vram_tier(vram))
=======
def detect() -> HardwareInfo:
nv = _detect_nvidia()
>>>>>>> refs/remotes/azuze/main
if nv: if nv:
name, vram = nv name, vram = nv
return HardwareInfo("nvidia", "cuda", name, vram, _vram_tier(vram)) return HardwareInfo("nvidia", "cuda", name, vram, _vram_tier(vram))
<<<<<<< HEAD
if is_windows and win_adapters: if is_windows and win_adapters:
win_adapters.sort(key=lambda a: a[1], reverse=True) win_adapters.sort(key=lambda a: a[1], reverse=True)
name, vram, hint = win_adapters[0] name, vram, hint = win_adapters[0]
@@ -254,6 +275,23 @@ def detect(force_backend: str | None = None, force_vendor: str | None = None) ->
# No good Intel-on-Linux torch path here; default to CPU. # No good Intel-on-Linux torch path here; default to CPU.
return HardwareInfo("cpu", "cpu", f"Intel GPU (no backend) — {name}", 0.0, "cpu") return HardwareInfo("cpu", "cpu", f"Intel GPU (no backend) — {name}", 0.0, "cpu")
=======
adapters = _detect_dxgi()
# Prefer discrete (highest VRAM) non-basic adapter
adapters = [a for a in adapters if "basic" not in a[0].lower() and "microsoft" not in a[0].lower()]
if adapters:
adapters.sort(key=lambda a: a[1], reverse=True)
name, vram, hint = adapters[0]
# AdapterRAM is unreliable for >4GB cards. If exactly 4GB and modern AMD/Intel card name, bump.
if vram <= 4.1 and any(k in name.lower() for k in ("rx 6", "rx 7", "arc a", "arc b")):
vram = 8.0 # conservative guess
if hint in ("amd", "intel"):
return HardwareInfo(hint, "directml", name, vram, _vram_tier(vram))
if hint == "nvidia":
# nvidia-smi missing but card is nvidia: drivers may be broken, fall through to directml
return HardwareInfo("nvidia", "directml", name, vram, _vram_tier(vram))
>>>>>>> refs/remotes/azuze/main
return HardwareInfo("cpu", "cpu", platform.processor() or "CPU", 0.0, "cpu") return HardwareInfo("cpu", "cpu", platform.processor() or "CPU", 0.0, "cpu")
@@ -277,6 +315,7 @@ def torch_install_args(info: HardwareInfo) -> list[str]:
"--index-url", "--index-url",
"https://download.pytorch.org/whl/cu124", "https://download.pytorch.org/whl/cu124",
] ]
<<<<<<< HEAD
if info.backend == "rocm": if info.backend == "rocm":
# ROCm wheels are Linux-only. Index pinned to a stable ROCm release line. # ROCm wheels are Linux-only. Index pinned to a stable ROCm release line.
return [ return [
@@ -285,6 +324,8 @@ def torch_install_args(info: HardwareInfo) -> list[str]:
"--index-url", "--index-url",
"https://download.pytorch.org/whl/rocm6.1", "https://download.pytorch.org/whl/rocm6.1",
] ]
=======
>>>>>>> refs/remotes/azuze/main
if info.backend == "directml": if info.backend == "directml":
# torch-directml currently pins to torch 2.4.x. Match it. # torch-directml currently pins to torch 2.4.x. Match it.
return [ return [
@@ -292,12 +333,15 @@ def torch_install_args(info: HardwareInfo) -> list[str]:
"torchvision>=0.19,<0.20", "torchvision>=0.19,<0.20",
"torch-directml>=0.2.5", "torch-directml>=0.2.5",
] ]
<<<<<<< HEAD
if info.backend == "mps": if info.backend == "mps":
# Default PyPI torch wheel ships MPS support on macOS arm64. No custom index. # Default PyPI torch wheel ships MPS support on macOS arm64. No custom index.
return ["torch", "torchvision"] return ["torch", "torchvision"]
# CPU. macOS uses default PyPI wheels (no /whl/cpu index for darwin). # CPU. macOS uses default PyPI wheels (no /whl/cpu index for darwin).
if platform.system() == "Darwin": if platform.system() == "Darwin":
return ["torch", "torchvision"] return ["torch", "torchvision"]
=======
>>>>>>> refs/remotes/azuze/main
return [ return [
"torch", "torch",
"torchvision", "torchvision",
+8
View File
@@ -30,9 +30,14 @@ def apply_memory_strategy(pipe) -> None:
except Exception: except Exception:
pass pass
<<<<<<< HEAD
if backend in ("cuda", "rocm"): if backend in ("cuda", "rocm"):
# ROCm builds expose the cuda API, so accelerate offload hooks work the same way. # ROCm builds expose the cuda API, so accelerate offload hooks work the same way.
# Offload only if VRAM tight. # Offload only if VRAM tight.
=======
if backend == "cuda":
# Offload only if VRAM tight. cpu_offload is CUDA-only via accelerate hooks.
>>>>>>> refs/remotes/azuze/main
if vram < 10: if vram < 10:
try: try:
pipe.enable_sequential_cpu_offload() pipe.enable_sequential_cpu_offload()
@@ -57,6 +62,7 @@ def apply_memory_strategy(pipe) -> None:
pipe.to("cpu") pipe.to("cpu")
return return
<<<<<<< HEAD
if backend == "mps": if backend == "mps":
# Apple Silicon shares unified memory with CPU. accelerate's sequential offload # Apple Silicon shares unified memory with CPU. accelerate's sequential offload
# has spotty MPS support; rely on slicing/tiling already enabled above. # has spotty MPS support; rely on slicing/tiling already enabled above.
@@ -79,5 +85,7 @@ def apply_memory_strategy(pipe) -> None:
pipe.to("cpu") pipe.to("cpu")
return return
=======
>>>>>>> refs/remotes/azuze/main
# CPU # CPU
pipe.to("cpu") pipe.to("cpu")
+39
View File
@@ -11,7 +11,10 @@ uv install strategy (in order):
""" """
from __future__ import annotations from __future__ import annotations
<<<<<<< HEAD
import argparse import argparse
=======
>>>>>>> refs/remotes/azuze/main
import io import io
import json import json
import os import os
@@ -207,6 +210,7 @@ def _uv_pip(uv: list[str], args: list[str]) -> None:
subprocess.check_call(cmd) subprocess.check_call(cmd)
<<<<<<< HEAD
def detect_and_install( def detect_and_install(
uv: list[str], uv: list[str],
force_backend: str | None = None, force_backend: str | None = None,
@@ -221,6 +225,14 @@ def detect_and_install(
f"[kawai] Backend: {info.backend}{forced_note} | " f"[kawai] Backend: {info.backend}{forced_note} | "
f"{info.vendor} / {info.device_name} / {info.vram_gb:.1f} GB / tier={info.tier}" f"{info.vendor} / {info.device_name} / {info.vram_gb:.1f} GB / tier={info.tier}"
) )
=======
def detect_and_install(uv: list[str]) -> dict:
sys.path.insert(0, str(ROOT))
from backends import hardware
info = hardware.detect()
print(f"[kawai] Detected: {info.vendor} / {info.device_name} / {info.vram_gb:.1f} GB / tier={info.tier}")
>>>>>>> refs/remotes/azuze/main
_uv_pip(uv, hardware.torch_install_args(info)) _uv_pip(uv, hardware.torch_install_args(info))
_uv_pip(uv, ["-r", str(ROOT / "requirements.txt")]) _uv_pip(uv, ["-r", str(ROOT / "requirements.txt")])
@@ -231,7 +243,10 @@ def detect_and_install(
"device_name": info.device_name, "device_name": info.device_name,
"vram_gb": info.vram_gb, "vram_gb": info.vram_gb,
"tier": info.tier, "tier": info.tier,
<<<<<<< HEAD
"forced": bool(force_backend and force_backend != "auto"), "forced": bool(force_backend and force_backend != "auto"),
=======
>>>>>>> refs/remotes/azuze/main
} }
HARDWARE_CACHE.write_text(json.dumps(payload, indent=2)) HARDWARE_CACHE.write_text(json.dumps(payload, indent=2))
MARKER.write_text("ok") MARKER.write_text("ok")
@@ -245,12 +260,17 @@ def already_in_venv() -> bool:
return False return False
<<<<<<< HEAD
def relaunch_in_venv(forwarded_args: list[str]) -> None: def relaunch_in_venv(forwarded_args: list[str]) -> None:
=======
def relaunch_in_venv() -> None:
>>>>>>> refs/remotes/azuze/main
"""Re-exec the launcher inside the venv. Use subprocess on Windows because """Re-exec the launcher inside the venv. Use subprocess on Windows because
os.execv mangles argv with spaces in paths.""" os.execv mangles argv with spaces in paths."""
print("[kawai] Relaunching inside venv...") print("[kawai] Relaunching inside venv...")
py = str(venv_python()) py = str(venv_python())
script = str(ROOT / "launcher.py") script = str(ROOT / "launcher.py")
<<<<<<< HEAD
argv = [py, script, *forwarded_args] argv = [py, script, *forwarded_args]
if os.name == "nt": if os.name == "nt":
result = subprocess.run(argv) result = subprocess.run(argv)
@@ -299,6 +319,20 @@ def main() -> None:
if not MARKER.exists(): if not MARKER.exists():
uv = _ensure_uv() uv = _ensure_uv()
detect_and_install(uv, force_backend=forced, force_vendor=args.vendor) detect_and_install(uv, force_backend=forced, force_vendor=args.vendor)
=======
if os.name == "nt":
result = subprocess.run([py, script])
sys.exit(result.returncode)
else:
os.execv(py, [py, script])
def main() -> None:
if already_in_venv():
if not MARKER.exists():
uv = _ensure_uv()
detect_and_install(uv)
>>>>>>> refs/remotes/azuze/main
from app import run from app import run
run() run()
return return
@@ -306,8 +340,13 @@ def main() -> None:
uv = _ensure_uv() uv = _ensure_uv()
_create_venv(uv) _create_venv(uv)
if not MARKER.exists(): if not MARKER.exists():
<<<<<<< HEAD
detect_and_install(uv, force_backend=forced, force_vendor=args.vendor) detect_and_install(uv, force_backend=forced, force_vendor=args.vendor)
relaunch_in_venv(sys.argv[1:]) relaunch_in_venv(sys.argv[1:])
=======
detect_and_install(uv)
relaunch_in_venv()
>>>>>>> refs/remotes/azuze/main
if __name__ == "__main__": if __name__ == "__main__":