from flask import Flask, request, jsonify, render_template_string
from datetime import datetime
import json
import os
import requests
import traceback
app = Flask(__name__)
# Store recent requests in memory
recent_requests = []
MAX_REQUESTS = 50
# Configuration with better defaults
OLLAMA_ENDPOINT = os.getenv("OLLAMA_ENDPOINT", "http://localhost:11434")
OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "llama2") # More common default
VIDEO_FORMAT = os.getenv("VIDEO_FORMAT", "skip") # Skip videos by default for debugging
def test_ollama_connection():
"""Test connection to Ollama and get available models"""
try:
# Try to get model list
response = requests.get(f"{OLLAMA_ENDPOINT}/api/tags", timeout=5)
if response.status_code == 200:
models = response.json().get("models", [])
model_names = [m.get("name", "") for m in models]
print(f"✅ Connected to Ollama at {OLLAMA_ENDPOINT}")
print(f"Available models: {model_names}")
return True, model_names
else:
print(f"❌ Ollama responded with {response.status_code}: {response.text}")
return False, []
except Exception as e:
print(f"❌ Cannot connect to Ollama at {OLLAMA_ENDPOINT}: {e}")
return False, []
def convert_gemini_to_ollama_generate(gemini_request):
"""Convert Gemini format to Ollama /api/generate format"""
try:
contents = gemini_request.get("contents", [])
# Extract text parts and combine
prompt_parts = []
images = []
for content in contents:
parts = content.get("parts", [])
for part in parts:
if "text" in part:
prompt_parts.append(part["text"])
elif "inline_data" in part:
inline = part["inline_data"]
mime_type = inline.get("mime_type", "")
data = inline.get("data", "")
if VIDEO_FORMAT == "skip" and (
mime_type.startswith("video/") or mime_type.startswith("image/")
):
prompt_parts.append(
f"[Media content ({mime_type}) was present but skipped for debugging]"
)
print(f"⏭️ Skipping media: {mime_type}")
elif mime_type.startswith("image/"):
images.append(data)
print(f"🖼️ Adding image: {mime_type}")
# Build Ollama request
ollama_request = {
"model": OLLAMA_MODEL,
"prompt": " ".join(prompt_parts) if prompt_parts else "Hello",
"stream": False,
"options": {},
}
# Add images if present and not skipping
if images and VIDEO_FORMAT != "skip":
ollama_request["images"] = images
# Add generation config
gen_config = gemini_request.get("generationConfig", {})
if "temperature" in gen_config:
ollama_request["options"]["temperature"] = gen_config["temperature"]
if "maxOutputTokens" in gen_config:
ollama_request["options"]["num_predict"] = gen_config["maxOutputTokens"]
return ollama_request
except Exception as e:
print(f"❌ Error converting to Ollama format: {e}")
raise
def convert_ollama_generate_to_gemini(ollama_response):
"""Convert Ollama /api/generate response to Gemini format"""
try:
print(
f"🔄 Converting Ollama response: {json.dumps(ollama_response, indent=2)[:500]}..."
)
# Ollama /api/generate returns: {"response": "text", "done": true, ...}
response_text = ollama_response.get("response", "")
if not response_text:
print(f"❌ No 'response' field in Ollama response")
print(f"Full response keys: {list(ollama_response.keys())}")
return {"error": "No response text from Ollama"}
print(f"✅ Found response text: {response_text[:100]}...")
# Convert to Gemini format
gemini_response = {
"candidates": [
{
"content": {"parts": [{"text": response_text}], "role": "model"},
"finishReason": "STOP",
"index": 0,
}
],
"usageMetadata": {
"promptTokenCount": ollama_response.get("prompt_eval_count", 0),
"candidatesTokenCount": ollama_response.get("eval_count", 0),
"totalTokenCount": ollama_response.get("prompt_eval_count", 0)
+ ollama_response.get("eval_count", 0),
},
}
return gemini_response
except Exception as e:
print(f"❌ Error converting Ollama response: {e}")
print(f"Ollama response was: {ollama_response}")
traceback.print_exc()
return {"error": f"Conversion error: {str(e)}"}
HTML_TEMPLATE = """
Ollama Proxy Debug
🦙 Ollama Proxy Debug Interface
Configuration:
Ollama Endpoint: {{ endpoint }}
Model: {{ model }}
Video Format: {{ video_format }}
Ollama Status: {{ status_message }}
{% if models %}
Available Models: {{ models|join(', ') }}
{% endif %}
Send requests to: http://localhost:5000/webhook
{% for req in requests %}
{{ req.method }}
{{ req.timestamp }}
{% if req.forwarded %}
SENT TO OLLAMA
{% endif %}
{% if req.error %}
ERROR
{% endif %}
Path: {{ req.path }}
{% if req.body %}
Incoming (Gemini Format):
{{ req.body }}
{% endif %}
{% if req.ollama_request %}
Sent to Ollama:
{{ req.ollama_request }}
{% endif %}
{% if req.ollama_response %}
Ollama Raw Response:
{{ req.ollama_response }}
{% endif %}
{% if req.response %}
Final Response (Gemini Format):
{{ req.response }}
{% endif %}
{% if req.error %}
Error Details:
{{ req.error }}
{% endif %}
{% endfor %}
"""
@app.route("/")
def index():
connected, models = test_ollama_connection()
status_class = "connected" if connected else "error"
status_message = "Connected ✅" if connected else "Cannot connect ❌"
return render_template_string(
HTML_TEMPLATE,
requests=reversed(recent_requests),
endpoint=OLLAMA_ENDPOINT,
model=OLLAMA_MODEL,
video_format=VIDEO_FORMAT,
status_class=status_class,
status_message=status_message,
models=models,
)
@app.route("/webhook", methods=["POST", "PUT", "PATCH"], defaults={"subpath": ""})
@app.route("/webhook/", methods=["POST", "PUT", "PATCH"])
def webhook(subpath):
"""Accept requests and forward to Ollama /api/generate"""
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
full_path = request.full_path if request.query_string else request.path
print(f"\n{'='*60}")
print(f"[{timestamp}] INCOMING {request.method} {full_path}")
print(f"{'='*60}")
# Get request data
try:
gemini_request = request.get_json() if request.is_json else {}
body_display = (
json.dumps(gemini_request, indent=2) if gemini_request else "No JSON body"
)
except Exception as e:
body_display = f"Error parsing JSON: {e}"
gemini_request = {}
print(f"Request body: {body_display[:300]}...")
# Store request info
req_info = {
"timestamp": timestamp,
"method": request.method,
"path": full_path,
"body": body_display,
"forwarded": False,
"response": None,
"error": None,
"ollama_request": None,
"ollama_response": None,
}
try:
if gemini_request:
print(f"\n🔄 Converting to Ollama format...")
# Convert to Ollama format
ollama_request = convert_gemini_to_ollama_generate(gemini_request)
req_info["ollama_request"] = json.dumps(ollama_request, indent=2)
print(f"Ollama request: {json.dumps(ollama_request, indent=2)}")
# Send to Ollama
print(f"\n📤 Sending to Ollama: {OLLAMA_ENDPOINT}/api/generate")
response = requests.post(
f"{OLLAMA_ENDPOINT}/api/generate",
json=ollama_request,
headers={"Content-Type": "application/json"},
timeout=120,
)
print(f"📥 Ollama response status: {response.status_code}")
if response.status_code == 200:
ollama_response = response.json()
req_info["ollama_response"] = json.dumps(ollama_response, indent=2)
print(f"✅ Ollama responded successfully")
# Convert back to Gemini format
gemini_response = convert_ollama_generate_to_gemini(ollama_response)
if "error" in gemini_response:
req_info["error"] = gemini_response["error"]
req_info["response"] = json.dumps(gemini_response, indent=2)
else:
req_info["forwarded"] = True
req_info["response"] = json.dumps(gemini_response, indent=2)
print(f"✅ Conversion successful")
recent_requests.append(req_info)
if len(recent_requests) > MAX_REQUESTS:
recent_requests.pop(0)
return jsonify(gemini_response), 200
else:
error_text = response.text
error_msg = f"Ollama returned {response.status_code}: {error_text}"
print(f"❌ {error_msg}")
req_info["error"] = error_msg
req_info["ollama_response"] = error_text
req_info["forwarded"] = True
recent_requests.append(req_info)
if len(recent_requests) > MAX_REQUESTS:
recent_requests.pop(0)
return (
jsonify(
{
"error": {
"message": error_text,
"status": response.status_code,
}
}
),
response.status_code,
)
else:
req_info["error"] = "No JSON body received"
recent_requests.append(req_info)
if len(recent_requests) > MAX_REQUESTS:
recent_requests.pop(0)
return (
jsonify({"status": "error", "message": "No JSON body to process"}),
400,
)
except Exception as e:
error_msg = f"Exception: {str(e)}"
print(f"❌ {error_msg}")
traceback.print_exc()
req_info["error"] = error_msg
recent_requests.append(req_info)
if len(recent_requests) > MAX_REQUESTS:
recent_requests.pop(0)
return jsonify({"error": {"message": error_msg}}), 500
@app.route("/clear", methods=["POST"])
def clear():
"""Clear all stored requests"""
recent_requests.clear()
return jsonify({"status": "cleared"}), 200
@app.route("/test")
def test_endpoint():
"""Test endpoint to verify Ollama connection"""
connected, models = test_ollama_connection()
return jsonify(
{"connected": connected, "endpoint": OLLAMA_ENDPOINT, "models": models}
)
if __name__ == "__main__":
print("🦙 Ollama Proxy Server Starting...")
print(f"🔗 Ollama Endpoint: {OLLAMA_ENDPOINT}")
print(f"🤖 Model: {OLLAMA_MODEL}")
print(f"📹 Video Format: {VIDEO_FORMAT}")
print("📍 Web UI: http://localhost:5000")
print("📮 Webhook: http://localhost:5000/webhook")
print("🧪 Test: http://localhost:5000/test")
# Test connection on startup
connected, models = test_ollama_connection()
if not connected:
print("\n⚠️ WARNING: Cannot connect to Ollama!")
print("Please check:")
print("1. Is Ollama running? (ollama serve)")
print("2. Is it on the right port?")
print("3. Set OLLAMA_ENDPOINT env var if different")
app.run(host="0.0.0.0", port=5000, debug=True)