from flask import Flask, request, jsonify, render_template_string from datetime import datetime import json import os import requests import traceback app = Flask(__name__) # Store recent requests in memory recent_requests = [] MAX_REQUESTS = 50 # Configuration with better defaults OLLAMA_ENDPOINT = os.getenv("OLLAMA_ENDPOINT", "http://localhost:11434") OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "llama2") # More common default VIDEO_FORMAT = os.getenv("VIDEO_FORMAT", "skip") # Skip videos by default for debugging def test_ollama_connection(): """Test connection to Ollama and get available models""" try: # Try to get model list response = requests.get(f"{OLLAMA_ENDPOINT}/api/tags", timeout=5) if response.status_code == 200: models = response.json().get("models", []) model_names = [m.get("name", "") for m in models] print(f"✅ Connected to Ollama at {OLLAMA_ENDPOINT}") print(f"Available models: {model_names}") return True, model_names else: print(f"❌ Ollama responded with {response.status_code}: {response.text}") return False, [] except Exception as e: print(f"❌ Cannot connect to Ollama at {OLLAMA_ENDPOINT}: {e}") return False, [] def convert_gemini_to_ollama_generate(gemini_request): """Convert Gemini format to Ollama /api/generate format""" try: contents = gemini_request.get("contents", []) # Extract text parts and combine prompt_parts = [] images = [] for content in contents: parts = content.get("parts", []) for part in parts: if "text" in part: prompt_parts.append(part["text"]) elif "inline_data" in part: inline = part["inline_data"] mime_type = inline.get("mime_type", "") data = inline.get("data", "") if VIDEO_FORMAT == "skip" and ( mime_type.startswith("video/") or mime_type.startswith("image/") ): prompt_parts.append( f"[Media content ({mime_type}) was present but skipped for debugging]" ) print(f"⏭️ Skipping media: {mime_type}") elif mime_type.startswith("image/"): images.append(data) print(f"🖼️ Adding image: {mime_type}") # Build Ollama request ollama_request = { "model": OLLAMA_MODEL, "prompt": " ".join(prompt_parts) if prompt_parts else "Hello", "stream": False, "options": {}, } # Add images if present and not skipping if images and VIDEO_FORMAT != "skip": ollama_request["images"] = images # Add generation config gen_config = gemini_request.get("generationConfig", {}) if "temperature" in gen_config: ollama_request["options"]["temperature"] = gen_config["temperature"] if "maxOutputTokens" in gen_config: ollama_request["options"]["num_predict"] = gen_config["maxOutputTokens"] return ollama_request except Exception as e: print(f"❌ Error converting to Ollama format: {e}") raise def convert_ollama_generate_to_gemini(ollama_response): """Convert Ollama /api/generate response to Gemini format""" try: print( f"🔄 Converting Ollama response: {json.dumps(ollama_response, indent=2)[:500]}..." ) # Ollama /api/generate returns: {"response": "text", "done": true, ...} response_text = ollama_response.get("response", "") if not response_text: print(f"❌ No 'response' field in Ollama response") print(f"Full response keys: {list(ollama_response.keys())}") return {"error": "No response text from Ollama"} print(f"✅ Found response text: {response_text[:100]}...") # Convert to Gemini format gemini_response = { "candidates": [ { "content": {"parts": [{"text": response_text}], "role": "model"}, "finishReason": "STOP", "index": 0, } ], "usageMetadata": { "promptTokenCount": ollama_response.get("prompt_eval_count", 0), "candidatesTokenCount": ollama_response.get("eval_count", 0), "totalTokenCount": ollama_response.get("prompt_eval_count", 0) + ollama_response.get("eval_count", 0), }, } return gemini_response except Exception as e: print(f"❌ Error converting Ollama response: {e}") print(f"Ollama response was: {ollama_response}") traceback.print_exc() return {"error": f"Conversion error: {str(e)}"} HTML_TEMPLATE = """ Ollama Proxy Debug

🦙 Ollama Proxy Debug Interface

Configuration:
Ollama Endpoint: {{ endpoint }}
Model: {{ model }}
Video Format: {{ video_format }}
Ollama Status: {{ status_message }}
{% if models %} Available Models: {{ models|join(', ') }} {% endif %}

Send requests to: http://localhost:5000/webhook

{% for req in requests %}
{{ req.method }} {{ req.timestamp }} {% if req.forwarded %} SENT TO OLLAMA {% endif %} {% if req.error %} ERROR {% endif %}
Path: {{ req.path }}
{% if req.body %}
Incoming (Gemini Format):
{{ req.body }}
{% endif %} {% if req.ollama_request %}
Sent to Ollama:
{{ req.ollama_request }}
{% endif %} {% if req.ollama_response %}
Ollama Raw Response:
{{ req.ollama_response }}
{% endif %} {% if req.response %}
Final Response (Gemini Format):
{{ req.response }}
{% endif %} {% if req.error %}
Error Details:
{{ req.error }}
{% endif %}
{% endfor %}
""" @app.route("/") def index(): connected, models = test_ollama_connection() status_class = "connected" if connected else "error" status_message = "Connected ✅" if connected else "Cannot connect ❌" return render_template_string( HTML_TEMPLATE, requests=reversed(recent_requests), endpoint=OLLAMA_ENDPOINT, model=OLLAMA_MODEL, video_format=VIDEO_FORMAT, status_class=status_class, status_message=status_message, models=models, ) @app.route("/webhook", methods=["POST", "PUT", "PATCH"], defaults={"subpath": ""}) @app.route("/webhook/", methods=["POST", "PUT", "PATCH"]) def webhook(subpath): """Accept requests and forward to Ollama /api/generate""" timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") full_path = request.full_path if request.query_string else request.path print(f"\n{'='*60}") print(f"[{timestamp}] INCOMING {request.method} {full_path}") print(f"{'='*60}") # Get request data try: gemini_request = request.get_json() if request.is_json else {} body_display = ( json.dumps(gemini_request, indent=2) if gemini_request else "No JSON body" ) except Exception as e: body_display = f"Error parsing JSON: {e}" gemini_request = {} print(f"Request body: {body_display[:300]}...") # Store request info req_info = { "timestamp": timestamp, "method": request.method, "path": full_path, "body": body_display, "forwarded": False, "response": None, "error": None, "ollama_request": None, "ollama_response": None, } try: if gemini_request: print(f"\n🔄 Converting to Ollama format...") # Convert to Ollama format ollama_request = convert_gemini_to_ollama_generate(gemini_request) req_info["ollama_request"] = json.dumps(ollama_request, indent=2) print(f"Ollama request: {json.dumps(ollama_request, indent=2)}") # Send to Ollama print(f"\n📤 Sending to Ollama: {OLLAMA_ENDPOINT}/api/generate") response = requests.post( f"{OLLAMA_ENDPOINT}/api/generate", json=ollama_request, headers={"Content-Type": "application/json"}, timeout=120, ) print(f"📥 Ollama response status: {response.status_code}") if response.status_code == 200: ollama_response = response.json() req_info["ollama_response"] = json.dumps(ollama_response, indent=2) print(f"✅ Ollama responded successfully") # Convert back to Gemini format gemini_response = convert_ollama_generate_to_gemini(ollama_response) if "error" in gemini_response: req_info["error"] = gemini_response["error"] req_info["response"] = json.dumps(gemini_response, indent=2) else: req_info["forwarded"] = True req_info["response"] = json.dumps(gemini_response, indent=2) print(f"✅ Conversion successful") recent_requests.append(req_info) if len(recent_requests) > MAX_REQUESTS: recent_requests.pop(0) return jsonify(gemini_response), 200 else: error_text = response.text error_msg = f"Ollama returned {response.status_code}: {error_text}" print(f"❌ {error_msg}") req_info["error"] = error_msg req_info["ollama_response"] = error_text req_info["forwarded"] = True recent_requests.append(req_info) if len(recent_requests) > MAX_REQUESTS: recent_requests.pop(0) return ( jsonify( { "error": { "message": error_text, "status": response.status_code, } } ), response.status_code, ) else: req_info["error"] = "No JSON body received" recent_requests.append(req_info) if len(recent_requests) > MAX_REQUESTS: recent_requests.pop(0) return ( jsonify({"status": "error", "message": "No JSON body to process"}), 400, ) except Exception as e: error_msg = f"Exception: {str(e)}" print(f"❌ {error_msg}") traceback.print_exc() req_info["error"] = error_msg recent_requests.append(req_info) if len(recent_requests) > MAX_REQUESTS: recent_requests.pop(0) return jsonify({"error": {"message": error_msg}}), 500 @app.route("/clear", methods=["POST"]) def clear(): """Clear all stored requests""" recent_requests.clear() return jsonify({"status": "cleared"}), 200 @app.route("/test") def test_endpoint(): """Test endpoint to verify Ollama connection""" connected, models = test_ollama_connection() return jsonify( {"connected": connected, "endpoint": OLLAMA_ENDPOINT, "models": models} ) if __name__ == "__main__": print("🦙 Ollama Proxy Server Starting...") print(f"🔗 Ollama Endpoint: {OLLAMA_ENDPOINT}") print(f"🤖 Model: {OLLAMA_MODEL}") print(f"📹 Video Format: {VIDEO_FORMAT}") print("📍 Web UI: http://localhost:5000") print("📮 Webhook: http://localhost:5000/webhook") print("🧪 Test: http://localhost:5000/test") # Test connection on startup connected, models = test_ollama_connection() if not connected: print("\n⚠️ WARNING: Cannot connect to Ollama!") print("Please check:") print("1. Is Ollama running? (ollama serve)") print("2. Is it on the right port?") print("3. Set OLLAMA_ENDPOINT env var if different") app.run(host="0.0.0.0", port=5000, debug=True)