Files
llm_server_monitor/post_llama.py
2026-01-21 22:46:01 -05:00

149 lines
4.9 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""
Post a message with an attached file (contents embedded in JSON) to an
OpenAIcompatible API and echo the response.
Defaults:
• Server: https://llama-cpp.reeselink.com/v1/chat/completions
• Model : gpt-oss-120b
• No API key required (but can be supplied via --api-key)
"""
import argparse
import json
import sys
from pathlib import Path
import requests
# ----------------------------------------------------------------------
# Helper functions
# ----------------------------------------------------------------------
def load_file_text(path: Path) -> str:
"""Read the whole file as UTF8 text (fallback to binary → base64 if needed)."""
try:
return path.read_text(encoding="utf-8")
except UnicodeDecodeError:
# For nontext files we fall back to a base64 representation.
import base64
return base64.b64encode(path.read_bytes()).decode("ascii")
def build_payload(
model: str,
user_message: str,
file_content: str,
file_name: str,
) -> dict:
"""
Construct the JSON body expected by the OpenAI chat completions endpoint.
The “file attachment” is modeled as an extra userrole message that contains
the file name and its contents. This mirrors the way many frontends embed
files in the conversation history.
"""
# Primary user message (the prompt you want the model to answer)
messages = [{"role": "user", "content": user_message}]
# Add a second message that represents the attached file.
# The format is arbitrary you can change it to suit your downstream model.
file_message = (
f"[Attached file: {file_name}]\n"
f"{file_content}"
)
messages.append({"role": "user", "content": file_message})
return {
"model": model,
"messages": messages,
# Optional parameters feel free to expose them as CLI args later.
"temperature": 0.7,
"max_tokens": 1024,
}
def post_chat_completion(
endpoint: str,
payload: dict,
api_key: str | None = None,
) -> requests.Response:
"""POST the JSON payload to the API and return the raw response."""
headers = {"Content-Type": "application/json"}
if api_key:
headers["Authorization"] = f"Bearer {api_key}"
response = requests.post(endpoint, headers=headers, json=payload, timeout=300)
response.raise_for_status() # raise an exception for HTTP errors
return response
# ----------------------------------------------------------------------
# CLI entry point
# ----------------------------------------------------------------------
def main() -> None:
parser = argparse.ArgumentParser(
description="Send a message + file contents to an OpenAIcompatible API."
)
parser.add_argument(
"file",
type=Path,
help="Path to the file whose contents will be sent in the payload.",
)
parser.add_argument(
"-m",
"--message",
default="Please analyse the attached file.",
help="User message to send alongside the file.",
)
parser.add_argument(
"--model",
default="gpt-oss-120b",
help="Model name to request (default: %(default)s).",
)
parser.add_argument(
"--api-key",
default=None,
help="Optional API key; omit if the server does not require authentication.",
)
parser.add_argument(
"--url",
default="https://llama-cpp.reeselink.com/v1/chat/completions",
help="Base URL of the OpenAIcompatible endpoint (default: %(default)s).",
)
args = parser.parse_args()
# ------------------------------------------------------------------
# Read file and build request
# ------------------------------------------------------------------
if not args.file.is_file():
sys.exit(f"Error: '{args.file}' does not exist or is not a regular file.")
file_content = load_file_text(args.file)
payload = build_payload(
model=args.model,
user_message=args.message,
file_content=file_content,
file_name=args.file.name,
)
# ------------------------------------------------------------------
# Send request
# ------------------------------------------------------------------
try:
resp = post_chat_completion(endpoint=args.url, payload=payload, api_key=args.api_key)
except requests.RequestException as exc:
sys.exit(f"Request failed: {exc}")
# ------------------------------------------------------------------
# Echo the response (prettyprint JSON if possible)
# ------------------------------------------------------------------
try:
json_resp = resp.json()
print(json.dumps(json_resp["choices"][0]["message"]["content"], indent=2, ensure_ascii=False))
except ValueError:
# Not JSON just dump raw text
print(resp.text)
if __name__ == "__main__":
main()