"""Gradio UI. Two tabs: Image, Video. Auto-picks defaults from detected hardware.""" from __future__ import annotations import json import random import time from pathlib import Path import gradio as gr from backends import models, refiner, safety from backends.device import hardware_info ROOT = Path(__file__).parent OUTPUTS = ROOT / "outputs" OUTPUTS.mkdir(exist_ok=True) CONFIG = json.loads((ROOT / "config.json").read_text()) def _hw_summary() -> str: hw = hardware_info() return ( f"**{hw['device_name']}** — {hw['vendor'].upper()} via {hw['backend']} " f"— {hw['vram_gb']:.1f} GB VRAM — tier `{hw['tier']}`" ) def _models_status_md() -> str: rows = ["| Model | Kind | Min VRAM | Download | Status |", "|---|---|---|---|---|"] for m in models.IMAGE_MODELS + models.VIDEO_MODELS: status = "cached" if models.is_cached(m) else "not downloaded" rows.append( f"| {m.label} | {m.kind} | {m.min_vram_gb:.0f} GB | {m.download_gb:.0f} GB | {status} |" ) return "### Models\n\n" + "\n".join(rows) def _model_choices(kind: str) -> tuple[list[tuple[str, str]], str | None]: hw = hardware_info() available = models.list_for_tier(hw["tier"], kind) choices = [(models.label_with_meta(m), m.id) for m in available] default = models.default_for_tier(hw["tier"], kind) return choices, (default.id if default else None) def gen_image( prompt: str, negative_prompt: str, model_id: str, width: int, height: int, steps: int, guidance: float, seed: int, auto_refine: bool, ): if not prompt.strip(): raise gr.Error("Empty prompt.") chk = safety.check_prompt(prompt) if not chk.allowed: raise gr.Error(chk.reason) spec = models.find(model_id) if spec and not models.is_cached(spec): gr.Info(f"Downloading {spec.label} (~{spec.download_gb:.0f} GB) on first use. Watch terminal for progress.") refined = prompt if auto_refine: refined = refiner.refine(prompt, use_ollama=CONFIG["refiner"]["use_ollama"]) from backends import image_sdxl seed_val = None if seed < 0 else seed if seed_val is None: seed_val = random.randint(0, 2**31 - 1) img = image_sdxl.generate( prompt=refined, negative_prompt=negative_prompt, model_id=model_id, width=int(width), height=int(height), steps=int(steps), guidance=float(guidance), seed=seed_val, ) img_chk = safety.check_image(img) if not img_chk.allowed: raise gr.Error(img_chk.reason) out_path = OUTPUTS / f"img_{int(time.time())}_{seed_val}.png" img.save(out_path) info = f"Seed: {seed_val}\n\nPrompt used:\n{refined}" return img, info def gen_video( prompt: str, negative_prompt: str, model_id: str, width: int, height: int, num_frames: int, fps: int, steps: int, guidance: float, seed: int, auto_refine: bool, ): if not prompt.strip(): raise gr.Error("Empty prompt.") chk = safety.check_prompt(prompt) if not chk.allowed: raise gr.Error(chk.reason) spec = models.find(model_id) if spec and not models.is_cached(spec): gr.Info(f"Downloading {spec.label} (~{spec.download_gb:.0f} GB) on first use. Watch terminal for progress.") refined = prompt if auto_refine: refined = refiner.refine(prompt, use_ollama=CONFIG["refiner"]["use_ollama"]) from backends import video_ltx seed_val = None if seed < 0 else seed if seed_val is None: seed_val = random.randint(0, 2**31 - 1) path = video_ltx.generate( prompt=refined, negative_prompt=negative_prompt, model_id=model_id, width=int(width), height=int(height), num_frames=int(num_frames), fps=int(fps), steps=int(steps), guidance=float(guidance), seed=seed_val, ) info = f"Seed: {seed_val}\n\nPrompt used:\n{refined}" return path, info def build_ui() -> gr.Blocks: img_choices, img_default = _model_choices("image") vid_choices, vid_default = _model_choices("video") img_def = CONFIG["image_defaults"] vid_def = CONFIG["video_defaults"] with gr.Blocks(title="Kawai", analytics_enabled=False) as ui: gr.Markdown("# Kawai\nLocal AI image and video generator.") gr.Markdown(_hw_summary()) with gr.Tabs(): with gr.Tab("Image"): with gr.Row(): with gr.Column(scale=2): i_prompt = gr.Textbox(label="Prompt", lines=3, placeholder="Describe what you want...") i_neg = gr.Textbox(label="Negative prompt", lines=2, value=img_def["negative_prompt"]) i_refine = gr.Checkbox(label="Auto-refine prompt with local LLM", value=True) i_model = gr.Dropdown(choices=img_choices, value=img_default, label="Model") with gr.Row(): i_w = gr.Slider(512, 1536, value=img_def["width"], step=64, label="Width") i_h = gr.Slider(512, 1536, value=img_def["height"], step=64, label="Height") with gr.Row(): i_steps = gr.Slider(1, 80, value=img_def["steps"], step=1, label="Steps") i_guidance = gr.Slider(0.0, 15.0, value=img_def["guidance"], step=0.1, label="Guidance") i_seed = gr.Number(value=-1, label="Seed (-1 = random)", precision=0) i_btn = gr.Button("Generate", variant="primary") with gr.Column(scale=3): i_out = gr.Image(label="Output", type="pil") i_info = gr.Textbox(label="Info", lines=6, interactive=False) i_btn.click( gen_image, inputs=[i_prompt, i_neg, i_model, i_w, i_h, i_steps, i_guidance, i_seed, i_refine], outputs=[i_out, i_info], ) with gr.Tab("Video"): if not vid_choices: gr.Markdown("**Video disabled** — detected hardware lacks VRAM for any video model.") else: with gr.Row(): with gr.Column(scale=2): v_prompt = gr.Textbox(label="Prompt", lines=3) v_neg = gr.Textbox(label="Negative prompt", lines=2, value="") v_refine = gr.Checkbox(label="Auto-refine prompt with local LLM", value=True) v_model = gr.Dropdown(choices=vid_choices, value=vid_default, label="Model") with gr.Row(): v_w = gr.Slider(384, 1024, value=vid_def["width"], step=32, label="Width") v_h = gr.Slider(256, 1024, value=vid_def["height"], step=32, label="Height") with gr.Row(): v_frames = gr.Slider(17, 161, value=vid_def["num_frames"], step=8, label="Frames") v_fps = gr.Slider(8, 30, value=vid_def["fps"], step=1, label="FPS") with gr.Row(): v_steps = gr.Slider(10, 60, value=vid_def["steps"], step=1, label="Steps") v_guidance = gr.Slider(0.0, 10.0, value=vid_def["guidance"], step=0.1, label="Guidance") v_seed = gr.Number(value=-1, label="Seed (-1 = random)", precision=0) v_btn = gr.Button("Generate", variant="primary") with gr.Column(scale=3): v_out = gr.Video(label="Output") v_info = gr.Textbox(label="Info", lines=6, interactive=False) v_btn.click( gen_video, inputs=[v_prompt, v_neg, v_model, v_w, v_h, v_frames, v_fps, v_steps, v_guidance, v_seed, v_refine], outputs=[v_out, v_info], ) with gr.Tab("System"): gr.Markdown(_hw_summary()) gr.Markdown(_models_status_md()) gr.Markdown( "**Output folder:** `outputs/`\n\n" "**Models cache:** `models/diffusers/`\n\n" "**Prompt refiner:** Ollama with `dolphin-llama3:8b` if running, else GPT-2 fallback.\n\n" "Install Ollama: https://ollama.com/ then `ollama pull dolphin-llama3`.\n\n" "**Safety:** CSAM-gated only (prompt keyword gate + face age check on nude outputs). All other content allowed.\n\n" "**Note:** First use of a model triggers download (7–24 GB). Keep this terminal open during download." ) return ui def run() -> None: ui = build_ui() ui.queue().launch( server_name=CONFIG["ui"]["host"], server_port=CONFIG["ui"]["port"], inbrowser=CONFIG["ui"]["open_browser"], ) if __name__ == "__main__": run()