nixos/hosts/architect/llm.nix

57 lines
1.2 KiB
Nix
Raw Normal View History

{ config, pkgs, ... }:
2023-11-16 12:25:58 +00:00
let
2024-03-14 11:55:56 +00:00
backendDomain = "ollama.giugl.io";
frontendDomain = "llm.giugl.io";
ollamaPkg = pkgs.unstablePkgs.ollama-cuda;
2023-11-16 12:25:58 +00:00
in
{
2024-03-14 11:55:56 +00:00
environment = {
2024-05-27 13:39:06 +01:00
systemPackages = [ ollamaPkg ];
2024-03-14 11:55:56 +00:00
};
2025-03-10 12:30:55 +00:00
services = {
ollama = {
enable = true;
package = ollamaPkg;
acceleration = "cuda";
environmentVariables = {
OLLAMA_FLASH_ATTENTION = "1";
OLLAMA_NUM_PARALLEL = "2";
OLLAMA_KV_CACHE_TYPE = "q8_0";
};
};
2025-03-10 12:30:55 +00:00
open-webui.enable = true;
2024-05-23 23:45:48 +01:00
};
2024-03-14 11:55:56 +00:00
architect.vhost.${backendDomain} = {
2024-10-07 13:05:45 +01:00
dnsInterfaces = [ "tailscale" "lan" ];
2024-02-21 11:33:54 +00:00
locations."/" = {
host = config.services.ollama.host;
port = config.services.ollama.port;
2024-02-21 11:33:54 +00:00
allowLan = true;
allowWAN = true;
recommendedProxySettings = false;
2024-02-21 11:33:54 +00:00
extraConfig = ''
2024-04-02 22:56:17 +01:00
proxy_buffering off;
2024-02-21 11:33:54 +00:00
proxy_read_timeout 600s;
proxy_set_header Host localhost:${toString config.services.ollama.host};
2024-02-21 11:33:54 +00:00
'';
2023-11-16 12:25:58 +00:00
};
};
2025-03-10 12:30:55 +00:00
architect.vhost.${frontendDomain} = {
dnsInterfaces = [ "tailscale" "lan" ];
locations."/" = {
host = config.services.open-webui.host;
port = config.services.open-webui.port;
allowLan = true;
allowWAN = true;
};
};
2023-11-16 12:25:58 +00:00
}