nixos/hosts/architect/llm.nix

{ pkgs, ... }:

let
  frontendDomain = "pino.giugl.io";
  backendDomain = "ollama.giugl.io";
  frontendPort = 3030;
  ollamaPort = 11434;
  listenAddress = "127.0.0.1:${toString ollamaPort}";
  ollamaPkg = pkgs.unstablePkgs.ollama;
in
{
  environment = {
    systemPackages = [ ollamaPkg ];
  };

  services.ollama = {
    inherit listenAddress;

    enable = true;
    acceleration = "cuda";
    package = ollamaPkg;
    environmentVariables = {
      OLLAMA_ORIGINS = "*";
      OLLAMA_FLASH_ATTENTION = "1";
      OLLAMA_NUM_PARALLEL = "2";
    };
  };

  architect.vhost.${frontendDomain} = {
    dnsInterfaces = [ "tailscale" ];

    locations."/" = {
      host = "127.0.0.1";
      port = frontendPort;
      allowLan = true;
      allowWAN = true;
      extraConfig = ''
        proxy_read_timeout 600s;
      '';
    };
  };

  architect.vhost.${backendDomain} = {
    dnsInterfaces = [ "tailscale" ];

    locations."/" = {
      host = "127.0.0.1";
      port = 11434;
      allowLan = true;
      allowWAN = true;
      extraConfig = ''
        proxy_buffering off;
        proxy_read_timeout 600s;
        proxy_set_header Host localhost:${toString ollamaPort};
      '';
    };
  };

  virtualisation.oci-containers = {
    containers = {
      ollama-webui = {
        image = "ghcr.io/open-webui/open-webui:main";
        autoStart = true;

        ports = [
          "127.0.0.1:${toString frontendPort}:8080"
        ];

        environment = {
          OLLAMA_BASE_URL = "https://${backendDomain}";
        };

        extraOptions = [
          "--pull=always"
        ];
        volumes = [
          "/var/lib/ollama-webui:/app/backend/data"
        ];
      };
    };
  };
}