From 32dc2af002e62cbfd4e7097e0f144dda9802fcc0 Mon Sep 17 00:00:00 2001 From: Giulio De Pasquale Date: Thu, 14 Mar 2024 11:55:56 +0000 Subject: [PATCH] ollama: use native ollama --- hosts/architect/llm.nix | 59 ++++++++++++++++++----------------------- 1 file changed, 26 insertions(+), 33 deletions(-) diff --git a/hosts/architect/llm.nix b/hosts/architect/llm.nix index be2f582..a1584b3 100644 --- a/hosts/architect/llm.nix +++ b/hosts/architect/llm.nix @@ -1,38 +1,51 @@ { config, pkgs, ... }: let - domain = "pino.giugl.io"; - backendPort = 8080; + frontendDomain = "pino.giugl.io"; + backendDomain = "ollama.giugl.io"; frontendPort = 3030; - # llama-cpp = pkgs.unstablePkgs.llama-cpp.override { cudaSupport = true; }; - # ollama = pkgs.unstablePkgs.ollama.override { inherit llama-cpp; }; + ollamaPort = 11434; + listenAddress = "127.0.0.1:${toString ollamaPort}"; + ollamaPkg = pkgs.unstablePkgs.ollama; in { - # environment.systemPackages = [ ollama ]; - architect.vhost.${domain} = { + environment = { + systemPackages = [ ollamaPkg ]; + variables = { + OLLAMA_ORIGINS = "*"; + }; + }; + + services.ollama = { + inherit listenAddress; + + enable = true; + acceleration = "cuda"; + package = ollamaPkg; + }; + + architect.vhost.${frontendDomain} = { dnsInterfaces = [ "tailscale" ]; locations."/" = { - host = "172.17.0.1"; + host = "127.0.0.1"; port = frontendPort; allowLan = true; allowWAN = true; - # allow = [ config.architect.networks."tailscale".net ]; extraConfig = '' proxy_read_timeout 600s; ''; }; }; - architect.vhost."ollama.giugl.io" = { + architect.vhost.${backendDomain} = { dnsInterfaces = [ "tailscale" ]; locations."/" = { - host = "172.17.0.1"; + host = "127.0.0.1"; port = 11434; allowLan = true; allowWAN = true; - # allow = [ config.architect.networks."tailscale".net ]; extraConfig = '' proxy_read_timeout 600s; ''; @@ -46,12 +59,11 @@ in autoStart = true; ports = [ - "172.17.0.1:${toString frontendPort}:${toString backendPort}" + "127.0.0.1:${toString frontendPort}:8080" ]; environment = { - PORT = "${toString backendPort}"; - OLLAMA_API_BASE_URL = "http://172.17.0.1:11434/api"; + OLLAMA_API_BASE_URL = "https://${backendDomain}/api"; }; extraOptions = [ @@ -61,25 +73,6 @@ in "/var/lib/ollama-webui:/app/backend/data" ]; }; - - ollama = { - image = "ollama/ollama:latest"; - autoStart = true; - extraOptions = [ - "--pull=always" - "--gpus=all" - ]; - environment = { - OLLAMA_ORIGINS = "*"; - }; - volumes = [ - "/ollama:/root/.ollama" - ]; - ports = [ - "127.0.0.1:11434:11434" - "172.17.0.1:11434:11434" - ]; - }; }; }; }