snippets/nvim-config/lua/plugins/minuet-ai.lua

local function get_text_fn(json)
  return json.response
end

return {
  {
    "milanglacier/minuet-ai.nvim",
    config = function()
      require("minuet").setup({
        virtualtext = {
          auto_trigger_ft = {},
          keymap = {
            -- accept whole completion
            accept = "<A-A>",
            -- accept one line
            accept_line = "<A-a>",
            -- accept n lines (prompts for number)
            -- e.g. "A-z 2 CR" will accept 2 lines
            accept_n_lines = "<A-z>",
            -- Cycle to prev completion item, or manually invoke completion
            prev = "<A-[>",
            -- Cycle to next completion item, or manually invoke completion
            next = "<A-]>",
            dismiss = "<A-e>",
          },
        },
        provider = "openai_fim_compatible",
        -- the maximum total characters of the context before and after the cursor
        -- 16000 characters typically equate to approximately 4,000 tokens for
        -- LLMs.
        context_window = 4000,
        -- when the total characters exceed the context window, the ratio of
        -- context before cursor and after cursor, the larger the ratio the more
        -- context before cursor will be used. This option should be between 0 and
        -- 1, context_ratio = 0.75 means the ratio will be 3:1.
        context_ratio = 0.75,
        throttle = 0, -- only send the request every x milliseconds, use 0 to disable throttle.
        -- debounce the request in x milliseconds, set to 0 to disable debounce
        debounce = 100,
        -- Control notification display for request status
        -- Notification options:
        -- false: Disable all notifications (use boolean false, not string "false")
        -- "debug": Display all notifications (comprehensive debugging)
        -- "verbose": Display most notifications
        -- "warn": Display warnings and errors only
        -- "error": Display errors only
        notify = "warn",
        -- The request timeout, measured in seconds. When streaming is enabled
        -- (stream = true), setting a shorter request_timeout allows for faster
        -- retrieval of completion items, albeit potentially incomplete.
        -- Conversely, with streaming disabled (stream = false), a timeout
        -- occurring before the LLM returns results will yield no completion items.
        request_timeout = 10,
        -- If completion item has multiple lines, create another completion item
        -- only containing its first line. This option only has impact for cmp and
        -- blink. For virtualtext, no single line entry will be added.
        add_single_line_entry = true,
        -- The number of completion items encoded as part of the prompt for the
        -- chat LLM. For FIM model, this is the number of requests to send. It's
        -- important to note that when 'add_single_line_entry' is set to true, the
        -- actual number of returned items may exceed this value. Additionally, the
        -- LLM cannot guarantee the exact number of completion items specified, as
        -- this parameter serves only as a prompt guideline.
        n_completions = 3,
        -- Defines the length of non-whitespace context after the cursor used to
        -- filter completion text. Set to 0 to disable filtering.
        --
        -- Example: With after_cursor_filter_length = 3 and context:
        --
        -- "def fib(n):\n|\n\nfib(5)" (where | represents cursor position),
        --
        -- if the completion text contains "fib", then "fib" and subsequent text
        -- will be removed. This setting filters repeated text generated by the
        -- LLM. A large value (e.g., 15) is recommended to avoid false positives.
        after_cursor_filter_length = 15,
        -- proxy port to use
        proxy = nil,
        provider_options = {
          openai_compatible = {
            api_key = "TERM",
            name = "Ollama",
            end_point = "http://gpu.dighist.geschichte.hu-berlin.de:11434/api/generate",
            model = "granite3.3-fim:8b",
            optional = {
              max_tokens = 512,
              top_p = 0.9,
            },
          },
          openai_fim_compatible = {
            api_key = "TERM",
            name = "Ollama",
            end_point = "http://gpu.dighist.geschichte.hu-berlin.de:11434/api/generate",
            model = "granite3.3-fim:8b",
            stream = true,
            optional = {
              max_tokens = 512,
              top_p = 0.9,
            },
            get_text_fn = {
              no_stream = function(json)
                return json.response
              end,
              stream = function(json)
                return json.response
              end,
            },
            template = {
              suffix = function(context_before_cursor, context_after_cursor)
                return "<fim_prefix>"
                  .. context_before_cursor
                  .. "<fim_suffix>"
                  .. context_after_cursor
                  .. "<fim_middle>"
              end,
            },
          },
        },
      })
    end,
  },
  { "nvim-lua/plenary.nvim" },
}