Merge branch 'main' of https://git.azuze.fr/kawa/KawAI

2026-05-04 10:03:38 +02:00
parent 3f6cdffe0e 965a3d97c6
commit 0b670cbd24
5 changed files with 236 additions and 0 deletions
@@ -30,9 +30,20 @@ def apply_memory_strategy(pipe) -> None:
        except Exception:
            pass

+<<<<<<< HEAD
    if backend in ("cuda", "rocm"):
        # ROCm builds expose the cuda API, so accelerate offload hooks work the same way.
        # Offload only if VRAM tight.
+=======
+<<<<<<< HEAD
+    if backend in ("cuda", "rocm"):
+        # ROCm builds expose the cuda API, so accelerate offload hooks work the same way.
+        # Offload only if VRAM tight.
+=======
+    if backend == "cuda":
+        # Offload only if VRAM tight. cpu_offload is CUDA-only via accelerate hooks.
+>>>>>>> refs/remotes/azuze/main
+>>>>>>> 965a3d97c6dae38fa25174559b1ea0f3050788f9
        if vram < 10:
            try:
                pipe.enable_sequential_cpu_offload()
@@ -57,6 +68,10 @@ def apply_memory_strategy(pipe) -> None:
            pipe.to("cpu")
        return

+<<<<<<< HEAD
+=======
+<<<<<<< HEAD
+>>>>>>> 965a3d97c6dae38fa25174559b1ea0f3050788f9
    if backend == "mps":
        # Apple Silicon shares unified memory with CPU. accelerate's sequential offload
        # has spotty MPS support; rely on slicing/tiling already enabled above.
@@ -79,5 +94,10 @@ def apply_memory_strategy(pipe) -> None:
            pipe.to("cpu")
        return

+<<<<<<< HEAD
+=======
+=======
+>>>>>>> refs/remotes/azuze/main
+>>>>>>> 965a3d97c6dae38fa25174559b1ea0f3050788f9
    # CPU
    pipe.to("cpu")