add litellm

2026-03-16 09:53:27 -04:00
parent eb67191706
commit 25d3a7805c
6 changed files with 374 additions and 0 deletions
--- a/active/container_litellm/config.yaml
+++ b/active/container_litellm/config.yaml
@@ -0,0 +1,67 @@
+# General settings
+
+general_settings:
+  request_timeout: 600
+
+# Models
+model_list:
+  # Qwen3.5-35B variants
+  - model_name: qwen3.5-35b-think-general
+    litellm_params:
+      model: openai/qwen3.5-35b-a3b
+      api_base: https://llama-cpp.reeselink.com
+      api_key: none
+      temperature: 1.0
+      top_p: 0.95
+      presence_penalty: 1.5
+      extra_body:
+        top_k: 20
+        min_p: 0.0
+        repetition_penalty: 1.0
+        chat_template_kwargs:
+          enable_thinking: true
+
+  - model_name: qwen3.5-35b-think-code
+    litellm_params:
+      model: openai/qwen3.5-35b-a3b
+      api_base: https://llama-cpp.reeselink.com
+      api_key: none
+      temperature: 0.6
+      top_p: 0.95
+      presence_penalty: 0.0
+      extra_body:
+        top_k: 20
+        min_p: 0.0
+        repetition_penalty: 1.0
+        chat_template_kwargs:
+          enable_thinking: true
+
+  - model_name: qwen3.5-35b-instruct-general
+    litellm_params:
+      model: openai/qwen3.5-35b-a3b
+      api_base: https://llama-cpp.reeselink.com
+      api_key: none
+      temperature: 0.7
+      top_p: 0.8
+      presence_penalty: 1.5
+      extra_body:
+        top_k: 20
+        min_p: 0.0
+        repetition_penalty: 1.0
+        chat_template_kwargs:
+          enable_thinking: false
+
+  - model_name: qwen3.5-35b-instruct-reasoning
+    litellm_params:
+      model: openai/qwen3.5-35b-a3b
+      api_base: https://llama-cpp.reeselink.com
+      api_key: none
+      temperature: 1.0
+      top_p: 0.95
+      presence_penalty: 1.5
+      extra_body:
+        top_k: 20
+        min_p: 0.0
+        repetition_penalty: 1.0
+        chat_template_kwargs:
+          enable_thinking: false