{ config, pkgs, ... }:

let
  backendDomain = "ollama.giugl.io";
  frontendDomain = "llm.giugl.io";
  ollamaPkg = pkgs.unstablePkgs.ollama-cuda;
in
{
  environment = {
    systemPackages = [ ollamaPkg ];
  };

  services = {
    ollama = {
      enable = true;

      package = ollamaPkg;
      acceleration = "cuda";
      environmentVariables = {
        OLLAMA_FLASH_ATTENTION = "1";
        OLLAMA_NUM_PARALLEL = "2";
        OLLAMA_KV_CACHE_TYPE = "q8_0";
      };
    };

    open-webui.enable = true;
  };

  architect.vhost.${backendDomain} = {
    dnsInterfaces = [ "tailscale" "lan" ];

    locations."/" = {
      host = config.services.ollama.host;
      port = config.services.ollama.port;
      allowLan = true;
      allowWAN = true;
      recommendedProxySettings = false;
      extraConfig = ''
        proxy_buffering off;
        proxy_read_timeout 600s;
        proxy_set_header Host localhost:${toString config.services.ollama.host};
      '';
    };
  };

  architect.vhost.${frontendDomain} = {
    dnsInterfaces = [ "tailscale" "lan" ];

    locations."/" = {
      host = config.services.open-webui.host;
      port = config.services.open-webui.port;
      allowLan = true;
      allowWAN = true;
    };
  };
}