{ config, pkgs, ... }: let backendDomain = "ollama.giugl.io"; frontendDomain = "llm.giugl.io"; ollamaPkg = pkgs.unstablePkgs.ollama-cuda; in { environment = { systemPackages = [ ollamaPkg ]; }; services = { ollama = { enable = true; package = ollamaPkg; acceleration = "cuda"; environmentVariables = { OLLAMA_FLASH_ATTENTION = "1"; OLLAMA_NUM_PARALLEL = "2"; OLLAMA_KV_CACHE_TYPE = "q8_0"; }; }; open-webui.enable = true; }; architect.vhost.${backendDomain} = { dnsInterfaces = [ "tailscale" "lan" ]; locations."/" = { host = config.services.ollama.host; port = config.services.ollama.port; allowLan = true; allowWAN = true; recommendedProxySettings = false; extraConfig = '' proxy_buffering off; proxy_read_timeout 600s; proxy_set_header Host localhost:${toString config.services.ollama.host}; ''; }; }; architect.vhost.${frontendDomain} = { dnsInterfaces = [ "tailscale" "lan" ]; locations."/" = { host = config.services.open-webui.host; port = config.services.open-webui.port; allowLan = true; allowWAN = true; }; }; }