tweaked avante, removed example, added generate_overview.bash

2025-07-01 20:34:13 +02:00
parent ace703ea1d
commit 41e1067647
5 changed files with 81 additions and 227 deletions
--- a/nvim-config/lua/plugins/avante.lua
+++ b/nvim-config/lua/plugins/avante.lua
@ -9,26 +9,18 @@ return {
    behaviour = {
      enable_cursor_planning_mode = true, -- enable cursor planning mode!
    },
-    ollama = {
-      endpoint = "http://gpu.dighist.geschichte.hu-berlin.de:11434",
-      model = "cogito:14b", -- your desired model (or use gpt-4o, etc.)
-      timeout = 30000, -- Timeout in milliseconds, increase this for reasoning models
-      temperature = 0,
-      max_completion_tokens = 8192, -- Increase this to include reasoning tokens (for reasoning models)
-      stream = true,
-      thinking = true,
-      system_prompt = "Enable deep thinking subroutine.",
-      -- reasoning_effort = "high", -- low|medium|high, only used for reasoning models
-    },
-    rag_service = {
-      enabled = true, -- Enables the RAG service
-      host_mount = os.getenv("HOME"), -- Host mount path for the rag service
-      provider = "ollama", -- The provider to use for RAG service (e.g. openai or ollama)
-      llm_model = "qwen3:32b", -- The LLM model to use for RAG service
-      embed_model = "nomic-embed-text", -- The embedding model to use for RAG service
-      endpoint = "http://gpu.dighist.geschichte.hu-berlin.de:11434", -- The API endpoint for RAG service
-    },
-    vendors = {
+    providers = {
+      ollama = {
+        endpoint = "http://gpu.dighist.geschichte.hu-berlin.de:11434",
+        model = "cogito:32b", -- your desired model (or use gpt-4o, etc.)
+        timeout = 30000, -- Timeout in milliseconds, increase this for reasoning models
+        temperature = 0,
+        max_completion_tokens = 40000, -- Increase this to include reasoning tokens (for reasoning models)
+        stream = true,
+        thinking = true,
+        --system_prompt = "Enable deep thinking subroutine.",
+        -- reasoning_effort = "high", -- low|medium|high, only used for reasoning models
+      },
      deepthink = {
        __inherited_from = "ollama",
        model = "qwen3:32b",
@ -36,6 +28,14 @@ return {
        reasoning_effort = "high",
      },
    },
+    rag_service = {
+      enabled = true, -- Enables the RAG service
+      host_mount = os.getenv("HOME"), -- Host mount path for the rag service
+      provider = "ollama", -- The provider to use for RAG service (e.g. openai or ollama)
+      llm_model = "cogito", -- The LLM model to use for RAG service
+      embed_model = "nomic-embed-text", -- The embedding model to use for RAG service
+      endpoint = "http://gpu.dighist.geschichte.hu-berlin.de:11434", -- The API endpoint for RAG service
+    },
  },
  -- if you want to build from source then do `make BUILD_FROM_SOURCE=true`
  build = "make",