homelab/active/container_litellm/config.yaml

# General settings

general_settings:
  request_timeout: 600

# Models
model_list:
  # Qwen3.5-35B variants
  - model_name: qwen3.5-35b-think-general
    litellm_params:
      model: openai/qwen3.5-35b-a3b
      api_base: https://llama-cpp.reeselink.com
      api_key: none
      temperature: 1.0
      top_p: 0.95
      presence_penalty: 1.5
      extra_body:
        top_k: 20
        min_p: 0.0
        repetition_penalty: 1.0
        chat_template_kwargs:
          enable_thinking: true

  - model_name: qwen3.5-35b-think-code
    litellm_params:
      model: openai/qwen3.5-35b-a3b
      api_base: https://llama-cpp.reeselink.com
      api_key: none
      temperature: 0.6
      top_p: 0.95
      presence_penalty: 0.0
      extra_body:
        top_k: 20
        min_p: 0.0
        repetition_penalty: 1.0
        chat_template_kwargs:
          enable_thinking: true

  - model_name: qwen3.5-35b-instruct-general
    litellm_params:
      model: openai/qwen3.5-35b-a3b
      api_base: https://llama-cpp.reeselink.com
      api_key: none
      temperature: 0.7
      top_p: 0.8
      presence_penalty: 1.5
      extra_body:
        top_k: 20
        min_p: 0.0
        repetition_penalty: 1.0
        chat_template_kwargs:
          enable_thinking: false

  - model_name: qwen3.5-35b-instruct-reasoning
    litellm_params:
      model: openai/qwen3.5-35b-a3b
      api_base: https://llama-cpp.reeselink.com
      api_key: none
      temperature: 1.0
      top_p: 0.95
      presence_penalty: 1.5
      extra_body:
        top_k: 20
        min_p: 0.0
        repetition_penalty: 1.0
        chat_template_kwargs:
          enable_thinking: false