| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454 |
- from flask import Flask, request, jsonify, render_template_string
- from datetime import datetime
- import json
- import os
- import requests
- import traceback
- app = Flask(__name__)
- # Store recent requests in memory
- recent_requests = []
- MAX_REQUESTS = 50
- # Configuration with better defaults
- OLLAMA_ENDPOINT = os.getenv("OLLAMA_ENDPOINT", "http://localhost:11434")
- OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "llama2") # More common default
- VIDEO_FORMAT = os.getenv("VIDEO_FORMAT", "skip") # Skip videos by default for debugging
- def test_ollama_connection():
- """Test connection to Ollama and get available models"""
- try:
- # Try to get model list
- response = requests.get(f"{OLLAMA_ENDPOINT}/api/tags", timeout=5)
- if response.status_code == 200:
- models = response.json().get("models", [])
- model_names = [m.get("name", "") for m in models]
- print(f"✅ Connected to Ollama at {OLLAMA_ENDPOINT}")
- print(f"Available models: {model_names}")
- return True, model_names
- else:
- print(f"❌ Ollama responded with {response.status_code}: {response.text}")
- return False, []
- except Exception as e:
- print(f"❌ Cannot connect to Ollama at {OLLAMA_ENDPOINT}: {e}")
- return False, []
- def convert_gemini_to_ollama_generate(gemini_request):
- """Convert Gemini format to Ollama /api/generate format"""
- try:
- contents = gemini_request.get("contents", [])
- # Extract text parts and combine
- prompt_parts = []
- images = []
- for content in contents:
- parts = content.get("parts", [])
- for part in parts:
- if "text" in part:
- prompt_parts.append(part["text"])
- elif "inline_data" in part:
- inline = part["inline_data"]
- mime_type = inline.get("mime_type", "")
- data = inline.get("data", "")
- if VIDEO_FORMAT == "skip" and (
- mime_type.startswith("video/") or mime_type.startswith("image/")
- ):
- prompt_parts.append(
- f"[Media content ({mime_type}) was present but skipped for debugging]"
- )
- print(f"⏭️ Skipping media: {mime_type}")
- elif mime_type.startswith("image/"):
- images.append(data)
- print(f"🖼️ Adding image: {mime_type}")
- # Build Ollama request
- ollama_request = {
- "model": OLLAMA_MODEL,
- "prompt": " ".join(prompt_parts) if prompt_parts else "Hello",
- "stream": False,
- "options": {},
- }
- # Add images if present and not skipping
- if images and VIDEO_FORMAT != "skip":
- ollama_request["images"] = images
- # Add generation config
- gen_config = gemini_request.get("generationConfig", {})
- if "temperature" in gen_config:
- ollama_request["options"]["temperature"] = gen_config["temperature"]
- if "maxOutputTokens" in gen_config:
- ollama_request["options"]["num_predict"] = gen_config["maxOutputTokens"]
- return ollama_request
- except Exception as e:
- print(f"❌ Error converting to Ollama format: {e}")
- raise
- def convert_ollama_generate_to_gemini(ollama_response):
- """Convert Ollama /api/generate response to Gemini format"""
- try:
- print(
- f"🔄 Converting Ollama response: {json.dumps(ollama_response, indent=2)[:500]}..."
- )
- # Ollama /api/generate returns: {"response": "text", "done": true, ...}
- response_text = ollama_response.get("response", "")
- if not response_text:
- print(f"❌ No 'response' field in Ollama response")
- print(f"Full response keys: {list(ollama_response.keys())}")
- return {"error": "No response text from Ollama"}
- print(f"✅ Found response text: {response_text[:100]}...")
- # Convert to Gemini format
- gemini_response = {
- "candidates": [
- {
- "content": {"parts": [{"text": response_text}], "role": "model"},
- "finishReason": "STOP",
- "index": 0,
- }
- ],
- "usageMetadata": {
- "promptTokenCount": ollama_response.get("prompt_eval_count", 0),
- "candidatesTokenCount": ollama_response.get("eval_count", 0),
- "totalTokenCount": ollama_response.get("prompt_eval_count", 0)
- + ollama_response.get("eval_count", 0),
- },
- }
- return gemini_response
- except Exception as e:
- print(f"❌ Error converting Ollama response: {e}")
- print(f"Ollama response was: {ollama_response}")
- traceback.print_exc()
- return {"error": f"Conversion error: {str(e)}"}
- HTML_TEMPLATE = """
- <!DOCTYPE html>
- <html>
- <head>
- <title>Ollama Proxy Debug</title>
- <style>
- body { font-family: Arial, sans-serif; margin: 20px; background: #f5f5f5; }
- h1 { color: #333; }
- .config { background: #e3f2fd; padding: 15px; margin: 10px 0; border-radius: 5px; }
- .status { padding: 10px; margin: 10px 0; border-radius: 5px; }
- .status.connected { background: #c8e6c9; }
- .status.error { background: #ffcdd2; }
- .request {
- background: white;
- padding: 15px;
- margin: 10px 0;
- border-radius: 5px;
- box-shadow: 0 2px 4px rgba(0,0,0,0.1);
- }
- .timestamp { color: #666; font-size: 0.9em; }
- .method {
- display: inline-block;
- padding: 3px 8px;
- background: #4CAF50;
- color: white;
- border-radius: 3px;
- font-weight: bold;
- }
- .forwarded {
- display: inline-block;
- padding: 3px 8px;
- background: #2196F3;
- color: white;
- border-radius: 3px;
- font-size: 0.8em;
- margin-left: 10px;
- }
- .error-badge {
- display: inline-block;
- padding: 3px 8px;
- background: #f44336;
- color: white;
- border-radius: 3px;
- font-size: 0.8em;
- margin-left: 10px;
- }
- pre {
- background: #f4f4f4;
- padding: 10px;
- border-radius: 3px;
- overflow-x: auto;
- max-height: 300px;
- overflow-y: auto;
- }
- .clear-btn {
- background: #f44336;
- color: white;
- border: none;
- padding: 10px 20px;
- border-radius: 5px;
- cursor: pointer;
- margin: 10px 0;
- }
- .clear-btn:hover { background: #d32f2f; }
- </style>
- <script>
- function clearRequests() {
- fetch('/clear', { method: 'POST' })
- .then(() => location.reload());
- }
- // Auto-refresh every 5 seconds
- setTimeout(() => location.reload(), 5000);
- </script>
- </head>
- <body>
- <h1>🦙 Ollama Proxy Debug Interface</h1>
-
- <div class="config">
- <strong>Configuration:</strong><br>
- Ollama Endpoint: <strong>{{ endpoint }}</strong><br>
- Model: <strong>{{ model }}</strong><br>
- Video Format: <strong>{{ video_format }}</strong>
- </div>
-
- <div class="status {{ status_class }}">
- <strong>Ollama Status:</strong> {{ status_message }}<br>
- {% if models %}
- <strong>Available Models:</strong> {{ models|join(', ') }}
- {% endif %}
- </div>
-
- <p><strong>Send requests to:</strong> http://localhost:5000/webhook</p>
- <button class="clear-btn" onclick="clearRequests()">Clear All</button>
-
- <div id="requests">
- {% for req in requests %}
- <div class="request">
- <div>
- <span class="method">{{ req.method }}</span>
- <span class="timestamp">{{ req.timestamp }}</span>
- {% if req.forwarded %}
- <span class="forwarded">SENT TO OLLAMA</span>
- {% endif %}
- {% if req.error %}
- <span class="error-badge">ERROR</span>
- {% endif %}
- </div>
- <div><strong>Path:</strong> {{ req.path }}</div>
- {% if req.body %}
- <div><strong>Incoming (Gemini Format):</strong></div>
- <pre>{{ req.body }}</pre>
- {% endif %}
- {% if req.ollama_request %}
- <div><strong>Sent to Ollama:</strong></div>
- <pre>{{ req.ollama_request }}</pre>
- {% endif %}
- {% if req.ollama_response %}
- <div><strong>Ollama Raw Response:</strong></div>
- <pre>{{ req.ollama_response }}</pre>
- {% endif %}
- {% if req.response %}
- <div><strong>Final Response (Gemini Format):</strong></div>
- <pre>{{ req.response }}</pre>
- {% endif %}
- {% if req.error %}
- <div><strong>Error Details:</strong></div>
- <pre style="color: red;">{{ req.error }}</pre>
- {% endif %}
- </div>
- {% endfor %}
- </div>
- </body>
- </html>
- """
- @app.route("/")
- def index():
- connected, models = test_ollama_connection()
- status_class = "connected" if connected else "error"
- status_message = "Connected ✅" if connected else "Cannot connect ❌"
- return render_template_string(
- HTML_TEMPLATE,
- requests=reversed(recent_requests),
- endpoint=OLLAMA_ENDPOINT,
- model=OLLAMA_MODEL,
- video_format=VIDEO_FORMAT,
- status_class=status_class,
- status_message=status_message,
- models=models,
- )
- @app.route("/webhook", methods=["POST", "PUT", "PATCH"], defaults={"subpath": ""})
- @app.route("/webhook/<path:subpath>", methods=["POST", "PUT", "PATCH"])
- def webhook(subpath):
- """Accept requests and forward to Ollama /api/generate"""
- timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
- full_path = request.full_path if request.query_string else request.path
- print(f"\n{'='*60}")
- print(f"[{timestamp}] INCOMING {request.method} {full_path}")
- print(f"{'='*60}")
- # Get request data
- try:
- gemini_request = request.get_json() if request.is_json else {}
- body_display = (
- json.dumps(gemini_request, indent=2) if gemini_request else "No JSON body"
- )
- except Exception as e:
- body_display = f"Error parsing JSON: {e}"
- gemini_request = {}
- print(f"Request body: {body_display[:300]}...")
- # Store request info
- req_info = {
- "timestamp": timestamp,
- "method": request.method,
- "path": full_path,
- "body": body_display,
- "forwarded": False,
- "response": None,
- "error": None,
- "ollama_request": None,
- "ollama_response": None,
- }
- try:
- if gemini_request:
- print(f"\n🔄 Converting to Ollama format...")
- # Convert to Ollama format
- ollama_request = convert_gemini_to_ollama_generate(gemini_request)
- req_info["ollama_request"] = json.dumps(ollama_request, indent=2)
- print(f"Ollama request: {json.dumps(ollama_request, indent=2)}")
- # Send to Ollama
- print(f"\n📤 Sending to Ollama: {OLLAMA_ENDPOINT}/api/generate")
- response = requests.post(
- f"{OLLAMA_ENDPOINT}/api/generate",
- json=ollama_request,
- headers={"Content-Type": "application/json"},
- timeout=120,
- )
- print(f"📥 Ollama response status: {response.status_code}")
- if response.status_code == 200:
- ollama_response = response.json()
- req_info["ollama_response"] = json.dumps(ollama_response, indent=2)
- print(f"✅ Ollama responded successfully")
- # Convert back to Gemini format
- gemini_response = convert_ollama_generate_to_gemini(ollama_response)
- if "error" in gemini_response:
- req_info["error"] = gemini_response["error"]
- req_info["response"] = json.dumps(gemini_response, indent=2)
- else:
- req_info["forwarded"] = True
- req_info["response"] = json.dumps(gemini_response, indent=2)
- print(f"✅ Conversion successful")
- recent_requests.append(req_info)
- if len(recent_requests) > MAX_REQUESTS:
- recent_requests.pop(0)
- return jsonify(gemini_response), 200
- else:
- error_text = response.text
- error_msg = f"Ollama returned {response.status_code}: {error_text}"
- print(f"❌ {error_msg}")
- req_info["error"] = error_msg
- req_info["ollama_response"] = error_text
- req_info["forwarded"] = True
- recent_requests.append(req_info)
- if len(recent_requests) > MAX_REQUESTS:
- recent_requests.pop(0)
- return (
- jsonify(
- {
- "error": {
- "message": error_text,
- "status": response.status_code,
- }
- }
- ),
- response.status_code,
- )
- else:
- req_info["error"] = "No JSON body received"
- recent_requests.append(req_info)
- if len(recent_requests) > MAX_REQUESTS:
- recent_requests.pop(0)
- return (
- jsonify({"status": "error", "message": "No JSON body to process"}),
- 400,
- )
- except Exception as e:
- error_msg = f"Exception: {str(e)}"
- print(f"❌ {error_msg}")
- traceback.print_exc()
- req_info["error"] = error_msg
- recent_requests.append(req_info)
- if len(recent_requests) > MAX_REQUESTS:
- recent_requests.pop(0)
- return jsonify({"error": {"message": error_msg}}), 500
- @app.route("/clear", methods=["POST"])
- def clear():
- """Clear all stored requests"""
- recent_requests.clear()
- return jsonify({"status": "cleared"}), 200
- @app.route("/test")
- def test_endpoint():
- """Test endpoint to verify Ollama connection"""
- connected, models = test_ollama_connection()
- return jsonify(
- {"connected": connected, "endpoint": OLLAMA_ENDPOINT, "models": models}
- )
- if __name__ == "__main__":
- print("🦙 Ollama Proxy Server Starting...")
- print(f"🔗 Ollama Endpoint: {OLLAMA_ENDPOINT}")
- print(f"🤖 Model: {OLLAMA_MODEL}")
- print(f"📹 Video Format: {VIDEO_FORMAT}")
- print("📍 Web UI: http://localhost:5000")
- print("📮 Webhook: http://localhost:5000/webhook")
- print("🧪 Test: http://localhost:5000/test")
- # Test connection on startup
- connected, models = test_ollama_connection()
- if not connected:
- print("\n⚠️ WARNING: Cannot connect to Ollama!")
- print("Please check:")
- print("1. Is Ollama running? (ollama serve)")
- print("2. Is it on the right port?")
- print("3. Set OLLAMA_ENDPOINT env var if different")
- app.run(host="0.0.0.0", port=5000, debug=True)
|