1 tháng trước cách đây · efe04cadd4
--- a/app.py
+++ b/app.py
@@ -3,6 +3,7 @@ from datetime import datetime
 
				 import json
			
 
				 import os
			
 
				 import requests
			
 
				+import traceback
			
 
				 
			
 
				 app = Flask(__name__)
			
 
				 
			
@@ -10,140 +11,37 @@ app = Flask(__name__)
 
				 recent_requests = []
			
 
				 MAX_REQUESTS = 50
			
 
				 
			
 
				-# OpenAI endpoint configuration
			
 
				-OPENAI_ENDPOINT = os.getenv(
			
 
				-    "OPENAI_ENDPOINT", "http://localhost:11434/v1/chat/completions"
			
 
				-)
			
 
				-OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "none")
			
 
				-OPENAI_MODEL = os.getenv("OPENAI_MODEL", "InternVL3_5-14B")
			
 
				-# Video format: 'openai' (data URL), 'vllm' (try vLLM format), 'skip', or 'error'
			
 
				-VIDEO_FORMAT = os.getenv("VIDEO_FORMAT", "openai")
			
 
				-
			
 
				-# NEW: Endpoint type detection
			
 
				-ENDPOINT_TYPE = os.getenv("ENDPOINT_TYPE", "auto")  # 'openai', 'ollama', or 'auto'
			
 
				-
			
 
				-
			
 
				-def detect_endpoint_type(endpoint_url):
			
 
				-    """Auto-detect if endpoint is OpenAI-compatible or Ollama native"""
			
 
				-    if "/v1/chat/completions" in endpoint_url:
			
 
				-        return "openai"
			
 
				-    elif "/api/generate" in endpoint_url or "/api/chat" in endpoint_url:
			
 
				-        return "ollama"
			
 
				-    elif "localhost:11434" in endpoint_url or "ollama" in endpoint_url.lower():
			
 
				-        return "openai"  # Assume OpenAI-compatible for Ollama
			
 
				-    else:
			
 
				-        return "openai"  # Default to OpenAI format
			
 
				-
			
 
				-
			
 
				-def convert_gemini_to_openai(gemini_request):
			
 
				-    """Convert Gemini API format to OpenAI API format"""
			
 
				-    try:
			
 
				-        contents = gemini_request.get("contents", [])
			
 
				-        messages = []
			
 
				-        media_info = {"images": [], "videos": []}
			
 
				-
			
 
				-        for content in contents:
			
 
				-            parts = content.get("parts", [])
			
 
				-            message_content = []
			
 
				-
			
 
				-            for part in parts:
			
 
				-                # Handle text parts
			
 
				-                if "text" in part:
			
 
				-                    message_content.append({"type": "text", "text": part["text"]})
			
 
				-
			
 
				-                # Handle inline_data (images/video)
			
 
				-                elif "inline_data" in part:
			
 
				-                    inline = part["inline_data"]
			
 
				-                    mime_type = inline.get("mime_type", "")
			
 
				-                    data = inline.get("data", "")
			
 
				-
			
 
				-                    if mime_type.startswith("image/"):
			
 
				-                        # Images: Universally supported across all runners
			
 
				-                        media_info["images"].append(mime_type)
			
 
				-                        print(f"🖼️  Adding image: {mime_type}")
			
 
				-                        message_content.append(
			
 
				-                            {
			
 
				-                                "type": "image_url",
			
 
				-                                "image_url": {
			
 
				-                                    "url": f"data:{mime_type};base64,{data}",
			
 
				-                                    "detail": "auto",
			
 
				-                                },
			
 
				-                            }
			
 
				-                        )
			
 
				-
			
 
				-                    elif mime_type.startswith("video/"):
			
 
				-                        # Videos: Format depends on VIDEO_FORMAT setting
			
 
				-                        if VIDEO_FORMAT == "skip":
			
 
				-                            media_info["videos"].append(f"skipped ({mime_type})")
			
 
				-                            print(f"⏭️  Skipping video: {mime_type} (VIDEO_FORMAT=skip)")
			
 
				-                            message_content.append(
			
 
				-                                {
			
 
				-                                    "type": "text",
			
 
				-                                    "text": f"[Video content ({mime_type}) was present but skipped]",
			
 
				-                                }
			
 
				-                            )
			
 
				-
			
 
				-                        elif VIDEO_FORMAT == "error":
			
 
				-                            raise ValueError(
			
 
				-                                f"Video content detected ({mime_type}) but VIDEO_FORMAT=error"
			
 
				-                            )
			
 
				-
			
 
				-                        else:  # 'openai', 'vllm', or any other value
			
 
				-                            media_info["videos"].append(
			
 
				-                                f"format: {VIDEO_FORMAT} ({mime_type})"
			
 
				-                            )
			
 
				-                            print(
			
 
				-                                f"📹 Adding video ({VIDEO_FORMAT} format): {mime_type}"
			
 
				-                            )
			
 
				-                            message_content.append(
			
 
				-                                {
			
 
				-                                    "type": "image_url",
			
 
				-                                    "image_url": {
			
 
				-                                        "url": f"data:{mime_type};base64,{data}",
			
 
				-                                        "detail": "auto",
			
 
				-                                    },
			
 
				-                                }
			
 
				-                            )
			
 
				-
			
 
				-            # Add as user message
			
 
				-            # If only one content item and it's text, send as string for better compatibility
			
 
				-            if len(message_content) == 1 and message_content[0].get("type") == "text":
			
 
				-                messages.append({"role": "user", "content": message_content[0]["text"]})
			
 
				-            else:
			
 
				-                messages.append({"role": "user", "content": message_content})
			
 
				+# Configuration with better defaults
			
 
				+OLLAMA_ENDPOINT = os.getenv("OLLAMA_ENDPOINT", "http://localhost:11434")
			
 
				+OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "llama2")  # More common default
			
 
				+VIDEO_FORMAT = os.getenv("VIDEO_FORMAT", "skip")  # Skip videos by default for debugging
			
 
				 
			
 
				-        # Build OpenAI request
			
 
				-        openai_request = {"model": OPENAI_MODEL, "messages": messages}
			
 
				 
			
 
				-        # Add generation config as OpenAI parameters
			
 
				-        gen_config = gemini_request.get("generationConfig", {})
			
 
				-        if "maxOutputTokens" in gen_config:
			
 
				-            openai_request["max_tokens"] = gen_config["maxOutputTokens"]
			
 
				-        if "temperature" in gen_config:
			
 
				-            openai_request["temperature"] = gen_config["temperature"]
			
 
				-
			
 
				-        # Log media summary
			
 
				-        if media_info["images"] or media_info["videos"]:
			
 
				-            print(f"📊 Media summary:")
			
 
				-            if media_info["images"]:
			
 
				-                print(
			
 
				-                    f"   Images: {len(media_info['images'])} ({', '.join(media_info['images'])})"
			
 
				-                )
			
 
				-            if media_info["videos"]:
			
 
				-                print(f"   Videos: {', '.join(media_info['videos'])}")
			
 
				-
			
 
				-        return openai_request
			
 
				+def test_ollama_connection():
			
 
				+    """Test connection to Ollama and get available models"""
			
 
				+    try:
			
 
				+        # Try to get model list
			
 
				+        response = requests.get(f"{OLLAMA_ENDPOINT}/api/tags", timeout=5)
			
 
				+        if response.status_code == 200:
			
 
				+            models = response.json().get("models", [])
			
 
				+            model_names = [m.get("name", "") for m in models]
			
 
				+            print(f"✅ Connected to Ollama at {OLLAMA_ENDPOINT}")
			
 
				+            print(f"Available models: {model_names}")
			
 
				+            return True, model_names
			
 
				+        else:
			
 
				+            print(f"❌ Ollama responded with {response.status_code}: {response.text}")
			
 
				+            return False, []
			
 
				     except Exception as e:
			
 
				-        print(f"❌ Error converting request: {e}")
			
 
				-        raise
			
 
				+        print(f"❌ Cannot connect to Ollama at {OLLAMA_ENDPOINT}: {e}")
			
 
				+        return False, []
			
 
				 
			
 
				 
			
 
				-def convert_gemini_to_ollama(gemini_request):
			
 
				-    """Convert Gemini API format to Ollama native format"""
			
 
				+def convert_gemini_to_ollama_generate(gemini_request):
			
 
				+    """Convert Gemini format to Ollama /api/generate format"""
			
 
				     try:
			
 
				         contents = gemini_request.get("contents", [])
			
 
				 
			
 
				-        # Extract text and combine into a single prompt
			
 
				+        # Extract text parts and combine
			
 
				         prompt_parts = []
			
 
				         images = []
			
 
				 
			
@@ -158,26 +56,35 @@ def convert_gemini_to_ollama(gemini_request):
 
				                     mime_type = inline.get("mime_type", "")
			
 
				                     data = inline.get("data", "")
			
 
				 
			
 
				-                    if mime_type.startswith("image/") or mime_type.startswith("video/"):
			
 
				-                        # Ollama expects images in a different format
			
 
				-                        images.append(data)  # Just the base64 data
			
 
				-                        print(f"🖼️ Adding media for Ollama: {mime_type}")
			
 
				+                    if VIDEO_FORMAT == "skip" and (
			
 
				+                        mime_type.startswith("video/") or mime_type.startswith("image/")
			
 
				+                    ):
			
 
				+                        prompt_parts.append(
			
 
				+                            f"[Media content ({mime_type}) was present but skipped for debugging]"
			
 
				+                        )
			
 
				+                        print(f"⏭️ Skipping media: {mime_type}")
			
 
				+                    elif mime_type.startswith("image/"):
			
 
				+                        images.append(data)
			
 
				+                        print(f"🖼️ Adding image: {mime_type}")
			
 
				 
			
 
				         # Build Ollama request
			
 
				         ollama_request = {
			
 
				-            "model": OPENAI_MODEL,
			
 
				-            "prompt": " ".join(prompt_parts),
			
 
				+            "model": OLLAMA_MODEL,
			
 
				+            "prompt": " ".join(prompt_parts) if prompt_parts else "Hello",
			
 
				             "stream": False,
			
 
				+            "options": {},
			
 
				         }
			
 
				 
			
 
				-        # Add images if present
			
 
				-        if images:
			
 
				+        # Add images if present and not skipping
			
 
				+        if images and VIDEO_FORMAT != "skip":
			
 
				             ollama_request["images"] = images
			
 
				 
			
 
				         # Add generation config
			
 
				         gen_config = gemini_request.get("generationConfig", {})
			
 
				         if "temperature" in gen_config:
			
 
				-            ollama_request["options"] = {"temperature": gen_config["temperature"]}
			
 
				+            ollama_request["options"]["temperature"] = gen_config["temperature"]
			
 
				+        if "maxOutputTokens" in gen_config:
			
 
				+            ollama_request["options"]["num_predict"] = gen_config["maxOutputTokens"]
			
 
				 
			
 
				         return ollama_request
			
 
				     except Exception as e:
			
@@ -185,59 +92,22 @@ def convert_gemini_to_ollama(gemini_request):
 
				         raise
			
 
				 
			
 
				 
			
 
				-def convert_openai_to_gemini(openai_response):
			
 
				-    """Convert OpenAI API response to Gemini API format"""
			
 
				+def convert_ollama_generate_to_gemini(ollama_response):
			
 
				+    """Convert Ollama /api/generate response to Gemini format"""
			
 
				     try:
			
 
				-        # Extract the message content
			
 
				-        choices = openai_response.get("choices", [])
			
 
				-        if not choices:
			
 
				-            print(f"❌ No choices in OpenAI response: {openai_response}")
			
 
				-            return {"error": "No response generated"}
			
 
				-
			
 
				-        message = choices[0].get("message", {})
			
 
				-        content = message.get("content", "")
			
 
				-
			
 
				-        if not content:
			
 
				-            print(f"❌ No content in message: {message}")
			
 
				-            return {"error": "No response generated"}
			
 
				-
			
 
				-        # Convert to Gemini format
			
 
				-        gemini_response = {
			
 
				-            "candidates": [
			
 
				-                {
			
 
				-                    "content": {"parts": [{"text": content}], "role": "model"},
			
 
				-                    "finishReason": "STOP",
			
 
				-                    "index": 0,
			
 
				-                }
			
 
				-            ],
			
 
				-            "usageMetadata": {
			
 
				-                "promptTokenCount": openai_response.get("usage", {}).get(
			
 
				-                    "prompt_tokens", 0
			
 
				-                ),
			
 
				-                "candidatesTokenCount": openai_response.get("usage", {}).get(
			
 
				-                    "completion_tokens", 0
			
 
				-                ),
			
 
				-                "totalTokenCount": openai_response.get("usage", {}).get(
			
 
				-                    "total_tokens", 0
			
 
				-                ),
			
 
				-            },
			
 
				-        }
			
 
				-
			
 
				-        return gemini_response
			
 
				-    except Exception as e:
			
 
				-        print(f"❌ Error converting OpenAI response: {e}")
			
 
				-        raise
			
 
				-
			
 
				+        print(
			
 
				+            f"🔄 Converting Ollama response: {json.dumps(ollama_response, indent=2)[:500]}..."
			
 
				+        )
			
 
				 
			
 
				-def convert_ollama_to_gemini(ollama_response):
			
 
				-    """Convert Ollama native response to Gemini API format"""
			
 
				-    try:
			
 
				         # Ollama /api/generate returns: {"response": "text", "done": true, ...}
			
 
				         response_text = ollama_response.get("response", "")
			
 
				 
			
 
				         if not response_text:
			
 
				-            print(f"❌ No response text in Ollama response: {ollama_response}")
			
 
				-            return {"error": "No response generated"}
			
 
				+            print(f"❌ No 'response' field in Ollama response")
			
 
				+            print(f"Full response keys: {list(ollama_response.keys())}")
			
 
				+            return {"error": "No response text from Ollama"}
			
 
				+
			
 
				+        print(f"✅ Found response text: {response_text[:100]}...")
			
 
				 
			
 
				         # Convert to Gemini format
			
 
				         gemini_response = {
			
@@ -259,18 +129,23 @@ def convert_ollama_to_gemini(ollama_response):
 
				         return gemini_response
			
 
				     except Exception as e:
			
 
				         print(f"❌ Error converting Ollama response: {e}")
			
 
				-        raise
			
 
				+        print(f"Ollama response was: {ollama_response}")
			
 
				+        traceback.print_exc()
			
 
				+        return {"error": f"Conversion error: {str(e)}"}
			
 
				 
			
 
				 
			
 
				 HTML_TEMPLATE = """
			
 
				 <!DOCTYPE html>
			
 
				 <html>
			
 
				 <head>
			
 
				-    <title>POST Request Monitor</title>
			
 
				+    <title>Ollama Proxy Debug</title>
			
 
				     <style>
			
 
				         body { font-family: Arial, sans-serif; margin: 20px; background: #f5f5f5; }
			
 
				         h1 { color: #333; }
			
 
				-        .config { background: #e3f2fd; padding: 10px; margin: 10px 0; border-radius: 5px; }
			
 
				+        .config { background: #e3f2fd; padding: 15px; margin: 10px 0; border-radius: 5px; }
			
 
				+        .status { padding: 10px; margin: 10px 0; border-radius: 5px; }
			
 
				+        .status.connected { background: #c8e6c9; }
			
 
				+        .status.error { background: #ffcdd2; }
			
 
				         .request { 
			
 
				             background: white; 
			
 
				             padding: 15px; 
			
@@ -329,21 +204,30 @@ HTML_TEMPLATE = """
 
				             fetch('/clear', { method: 'POST' })
			
 
				                 .then(() => location.reload());
			
 
				         }
			
 
				-        // Auto-refresh every 3 seconds
			
 
				-        setTimeout(() => location.reload(), 3000);
			
 
				+        // Auto-refresh every 5 seconds
			
 
				+        setTimeout(() => location.reload(), 5000);
			
 
				     </script>
			
 
				 </head>
			
 
				 <body>
			
 
				-    <h1>📬 POST Request Monitor & AI Proxy</h1>
			
 
				+    <h1>🦙 Ollama Proxy Debug Interface</h1>
			
 
				+    
			
 
				     <div class="config">
			
 
				         <strong>Configuration:</strong><br>
			
 
				-        Endpoint: <strong>{{ endpoint }}</strong><br>
			
 
				-        Type: <strong>{{ endpoint_type }}</strong><br>
			
 
				+        Ollama Endpoint: <strong>{{ endpoint }}</strong><br>
			
 
				         Model: <strong>{{ model }}</strong><br>
			
 
				         Video Format: <strong>{{ video_format }}</strong>
			
 
				     </div>
			
 
				-    <p>Send POST requests to <strong>http://localhost:5005/webhook</strong></p>
			
 
				+    
			
 
				+    <div class="status {{ status_class }}">
			
 
				+        <strong>Ollama Status:</strong> {{ status_message }}<br>
			
 
				+        {% if models %}
			
 
				+        <strong>Available Models:</strong> {{ models|join(', ') }}
			
 
				+        {% endif %}
			
 
				+    </div>
			
 
				+    
			
 
				+    <p><strong>Send requests to:</strong> http://localhost:5000/webhook</p>
			
 
				     <button class="clear-btn" onclick="clearRequests()">Clear All</button>
			
 
				+    
			
 
				     <div id="requests">
			
 
				         {% for req in requests %}
			
 
				         <div class="request">
			
@@ -351,35 +235,31 @@ HTML_TEMPLATE = """
 
				                 <span class="method">{{ req.method }}</span>
			
 
				                 <span class="timestamp">{{ req.timestamp }}</span>
			
 
				                 {% if req.forwarded %}
			
 
				-                <span class="forwarded">FORWARDED ({{ req.endpoint_type }})</span>
			
 
				+                <span class="forwarded">SENT TO OLLAMA</span>
			
 
				                 {% endif %}
			
 
				                 {% if req.error %}
			
 
				                 <span class="error-badge">ERROR</span>
			
 
				                 {% endif %}
			
 
				             </div>
			
 
				             <div><strong>Path:</strong> {{ req.path }}</div>
			
 
				-            {% if req.query_params %}
			
 
				-            <div><strong>Query Parameters:</strong></div>
			
 
				-            <pre>{{ req.query_params }}</pre>
			
 
				-            {% endif %}
			
 
				             {% if req.body %}
			
 
				-            <div><strong>Incoming Body (Gemini Format):</strong></div>
			
 
				+            <div><strong>Incoming (Gemini Format):</strong></div>
			
 
				             <pre>{{ req.body }}</pre>
			
 
				             {% endif %}
			
 
				-            {% if req.converted_request %}
			
 
				-            <div><strong>Converted Request:</strong></div>
			
 
				-            <pre>{{ req.converted_request }}</pre>
			
 
				+            {% if req.ollama_request %}
			
 
				+            <div><strong>Sent to Ollama:</strong></div>
			
 
				+            <pre>{{ req.ollama_request }}</pre>
			
 
				             {% endif %}
			
 
				-            {% if req.raw_response %}
			
 
				-            <div><strong>Raw Response:</strong></div>
			
 
				-            <pre>{{ req.raw_response }}</pre>
			
 
				+            {% if req.ollama_response %}
			
 
				+            <div><strong>Ollama Raw Response:</strong></div>
			
 
				+            <pre>{{ req.ollama_response }}</pre>
			
 
				             {% endif %}
			
 
				             {% if req.response %}
			
 
				             <div><strong>Final Response (Gemini Format):</strong></div>
			
 
				             <pre>{{ req.response }}</pre>
			
 
				             {% endif %}
			
 
				             {% if req.error %}
			
 
				-            <div><strong>Error:</strong></div>
			
 
				+            <div><strong>Error Details:</strong></div>
			
 
				             <pre style="color: red;">{{ req.error }}</pre>
			
 
				             {% endif %}
			
 
				         </div>
			
@@ -392,183 +272,121 @@ HTML_TEMPLATE = """
 
				 
			
 
				 @app.route("/")
			
 
				 def index():
			
 
				-    endpoint_type = (
			
 
				-        ENDPOINT_TYPE
			
 
				-        if ENDPOINT_TYPE != "auto"
			
 
				-        else detect_endpoint_type(OPENAI_ENDPOINT)
			
 
				-    )
			
 
				+    connected, models = test_ollama_connection()
			
 
				+    status_class = "connected" if connected else "error"
			
 
				+    status_message = "Connected ✅" if connected else "Cannot connect ❌"
			
 
				+
			
 
				     return render_template_string(
			
 
				         HTML_TEMPLATE,
			
 
				         requests=reversed(recent_requests),
			
 
				-        endpoint=OPENAI_ENDPOINT,
			
 
				-        endpoint_type=endpoint_type,
			
 
				-        model=OPENAI_MODEL,
			
 
				+        endpoint=OLLAMA_ENDPOINT,
			
 
				+        model=OLLAMA_MODEL,
			
 
				         video_format=VIDEO_FORMAT,
			
 
				+        status_class=status_class,
			
 
				+        status_message=status_message,
			
 
				+        models=models,
			
 
				     )
			
 
				 
			
 
				 
			
 
				 @app.route("/webhook", methods=["POST", "PUT", "PATCH"], defaults={"subpath": ""})
			
 
				 @app.route("/webhook/<path:subpath>", methods=["POST", "PUT", "PATCH"])
			
 
				 def webhook(subpath):
			
 
				-    """Accept POST/PUT/PATCH requests, forward to AI endpoint, and return response"""
			
 
				+    """Accept requests and forward to Ollama /api/generate"""
			
 
				     timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
			
 
				-
			
 
				-    # Get full path with query parameters
			
 
				     full_path = request.full_path if request.query_string else request.path
			
 
				 
			
 
				     print(f"\n{'='*60}")
			
 
				     print(f"[{timestamp}] INCOMING {request.method} {full_path}")
			
 
				-    print(f"Matched route with subpath: '{subpath}'")
			
 
				     print(f"{'='*60}")
			
 
				 
			
 
				-    # Detect endpoint type
			
 
				-    endpoint_type = (
			
 
				-        ENDPOINT_TYPE
			
 
				-        if ENDPOINT_TYPE != "auto"
			
 
				-        else detect_endpoint_type(OPENAI_ENDPOINT)
			
 
				-    )
			
 
				-    print(f"Detected endpoint type: {endpoint_type}")
			
 
				-
			
 
				     # Get request data
			
 
				     try:
			
 
				-        body = request.get_data(as_text=True)
			
 
				         gemini_request = request.get_json() if request.is_json else {}
			
 
				-        body_display = json.dumps(gemini_request, indent=2) if gemini_request else body
			
 
				+        body_display = (
			
 
				+            json.dumps(gemini_request, indent=2) if gemini_request else "No JSON body"
			
 
				+        )
			
 
				     except Exception as e:
			
 
				-        body_display = str(request.get_data())
			
 
				+        body_display = f"Error parsing JSON: {e}"
			
 
				         gemini_request = {}
			
 
				 
			
 
				-    # Print request details
			
 
				-    if request.args:
			
 
				-        print("Query Parameters:")
			
 
				-        for key, value in request.args.items():
			
 
				-            print(f"  {key}: {value}")
			
 
				-    print(
			
 
				-        f"Body preview:\n{body_display[:500]}{'...' if len(body_display) > 500 else ''}"
			
 
				-    )
			
 
				+    print(f"Request body: {body_display[:300]}...")
			
 
				 
			
 
				-    # Store request info for monitoring
			
 
				+    # Store request info
			
 
				     req_info = {
			
 
				         "timestamp": timestamp,
			
 
				         "method": request.method,
			
 
				         "path": full_path,
			
 
				-        "query_params": dict(request.args),
			
 
				         "body": body_display,
			
 
				         "forwarded": False,
			
 
				         "response": None,
			
 
				         "error": None,
			
 
				-        "endpoint_type": endpoint_type,
			
 
				-        "converted_request": None,
			
 
				-        "raw_response": None,
			
 
				+        "ollama_request": None,
			
 
				+        "ollama_response": None,
			
 
				     }
			
 
				 
			
 
				-    # Try to forward to AI endpoint
			
 
				     try:
			
 
				         if gemini_request:
			
 
				-            print(f"\n{'='*60}")
			
 
				-            print(f"CONVERTING AND FORWARDING TO {endpoint_type.upper()} ENDPOINT")
			
 
				-            print(f"Target: {OPENAI_ENDPOINT}")
			
 
				-            print(f"{'='*60}")
			
 
				-
			
 
				-            # Convert based on endpoint type
			
 
				-            if endpoint_type == "ollama":
			
 
				-                converted_request = convert_gemini_to_ollama(gemini_request)
			
 
				-            else:  # openai
			
 
				-                converted_request = convert_gemini_to_openai(gemini_request)
			
 
				-
			
 
				-            # Log the converted request (truncate base64 for readability)
			
 
				-            logged_request = json.loads(json.dumps(converted_request))
			
 
				-            if endpoint_type == "openai":
			
 
				-                for msg in logged_request.get("messages", []):
			
 
				-                    if isinstance(msg.get("content"), list):
			
 
				-                        for item in msg["content"]:
			
 
				-                            if item.get("type") == "image_url":
			
 
				-                                url = item["image_url"]["url"]
			
 
				-                                if len(url) > 100:
			
 
				-                                    item["image_url"]["url"] = (
			
 
				-                                        url[:50] + "...[base64 data]..." + url[-20:]
			
 
				-                                    )
			
 
				-            elif "images" in logged_request:
			
 
				-                # Truncate Ollama images
			
 
				-                for i, img in enumerate(logged_request["images"]):
			
 
				-                    if len(img) > 100:
			
 
				-                        logged_request["images"][i] = (
			
 
				-                            img[:50] + "...[base64 data]..." + img[-20:]
			
 
				-                        )
			
 
				+            print(f"\n🔄 Converting to Ollama format...")
			
 
				 
			
 
				-            print(f"Converted request:\n{json.dumps(logged_request, indent=2)}")
			
 
				+            # Convert to Ollama format
			
 
				+            ollama_request = convert_gemini_to_ollama_generate(gemini_request)
			
 
				+            req_info["ollama_request"] = json.dumps(ollama_request, indent=2)
			
 
				 
			
 
				-            # Forward to endpoint
			
 
				-            headers = {
			
 
				-                "Content-Type": "application/json",
			
 
				-            }
			
 
				-            if (
			
 
				-                OPENAI_API_KEY
			
 
				-                and OPENAI_API_KEY != "none"
			
 
				-                and endpoint_type == "openai"
			
 
				-            ):
			
 
				-                headers["Authorization"] = f"Bearer {OPENAI_API_KEY}"
			
 
				+            print(f"Ollama request: {json.dumps(ollama_request, indent=2)}")
			
 
				+
			
 
				+            # Send to Ollama
			
 
				+            print(f"\n📤 Sending to Ollama: {OLLAMA_ENDPOINT}/api/generate")
			
 
				 
			
 
				-            print(f"Sending request to {OPENAI_ENDPOINT}...")
			
 
				             response = requests.post(
			
 
				-                OPENAI_ENDPOINT, json=converted_request, headers=headers, timeout=120
			
 
				+                f"{OLLAMA_ENDPOINT}/api/generate",
			
 
				+                json=ollama_request,
			
 
				+                headers={"Content-Type": "application/json"},
			
 
				+                timeout=120,
			
 
				             )
			
 
				 
			
 
				-            print(f"\nResponse Status: {response.status_code}")
			
 
				-            print(f"Response Headers: {dict(response.headers)}")
			
 
				+            print(f"📥 Ollama response status: {response.status_code}")
			
 
				 
			
 
				             if response.status_code == 200:
			
 
				-                raw_response = response.json()
			
 
				-                print(f"Raw Response:\n{json.dumps(raw_response, indent=2)[:1000]}...")
			
 
				+                ollama_response = response.json()
			
 
				+                req_info["ollama_response"] = json.dumps(ollama_response, indent=2)
			
 
				 
			
 
				-                # Convert back to Gemini format based on endpoint type
			
 
				-                if endpoint_type == "ollama":
			
 
				-                    gemini_response = convert_ollama_to_gemini(raw_response)
			
 
				-                else:  # openai
			
 
				-                    gemini_response = convert_openai_to_gemini(raw_response)
			
 
				+                print(f"✅ Ollama responded successfully")
			
 
				 
			
 
				-                print(
			
 
				-                    f"\nConverted Gemini Response:\n{json.dumps(gemini_response, indent=2)[:1000]}..."
			
 
				-                )
			
 
				+                # Convert back to Gemini format
			
 
				+                gemini_response = convert_ollama_generate_to_gemini(ollama_response)
			
 
				 
			
 
				-                req_info["forwarded"] = True
			
 
				-                req_info["response"] = json.dumps(gemini_response, indent=2)
			
 
				-                req_info["converted_request"] = json.dumps(logged_request, indent=2)
			
 
				-                req_info["raw_response"] = json.dumps(raw_response, indent=2)[:2000] + (
			
 
				-                    "..." if len(json.dumps(raw_response, indent=2)) > 2000 else ""
			
 
				-                )
			
 
				+                if "error" in gemini_response:
			
 
				+                    req_info["error"] = gemini_response["error"]
			
 
				+                    req_info["response"] = json.dumps(gemini_response, indent=2)
			
 
				+                else:
			
 
				+                    req_info["forwarded"] = True
			
 
				+                    req_info["response"] = json.dumps(gemini_response, indent=2)
			
 
				+                    print(f"✅ Conversion successful")
			
 
				 
			
 
				                 recent_requests.append(req_info)
			
 
				                 if len(recent_requests) > MAX_REQUESTS:
			
 
				                     recent_requests.pop(0)
			
 
				 
			
 
				-                print(f"{'='*60}\n")
			
 
				                 return jsonify(gemini_response), 200
			
 
				+
			
 
				             else:
			
 
				-                # Get detailed error
			
 
				-                try:
			
 
				-                    error_data = response.json()
			
 
				-                    error_msg = json.dumps(error_data, indent=2)
			
 
				-                except:
			
 
				-                    error_msg = response.text
			
 
				-
			
 
				-                full_error = f"{endpoint_type.upper()} endpoint returned {response.status_code}:\n{error_msg}"
			
 
				-                print(f"ERROR: {full_error}")
			
 
				-                req_info["error"] = full_error
			
 
				+                error_text = response.text
			
 
				+                error_msg = f"Ollama returned {response.status_code}: {error_text}"
			
 
				+                print(f"❌ {error_msg}")
			
 
				+
			
 
				+                req_info["error"] = error_msg
			
 
				+                req_info["ollama_response"] = error_text
			
 
				                 req_info["forwarded"] = True
			
 
				-                req_info["converted_request"] = json.dumps(logged_request, indent=2)
			
 
				-                req_info["raw_response"] = error_msg
			
 
				 
			
 
				                 recent_requests.append(req_info)
			
 
				                 if len(recent_requests) > MAX_REQUESTS:
			
 
				                     recent_requests.pop(0)
			
 
				 
			
 
				-                print(f"{'='*60}\n")
			
 
				                 return (
			
 
				                     jsonify(
			
 
				                         {
			
 
				                             "error": {
			
 
				-                                "message": error_msg,
			
 
				+                                "message": error_text,
			
 
				                                 "status": response.status_code,
			
 
				                             }
			
 
				                         }
			
@@ -576,29 +394,19 @@ def webhook(subpath):
 
				                     response.status_code,
			
 
				                 )
			
 
				         else:
			
 
				-            # No JSON body, just acknowledge
			
 
				-            req_info["error"] = "No JSON body to forward"
			
 
				+            req_info["error"] = "No JSON body received"
			
 
				             recent_requests.append(req_info)
			
 
				             if len(recent_requests) > MAX_REQUESTS:
			
 
				                 recent_requests.pop(0)
			
 
				 
			
 
				-            print(f"{'='*60}\n")
			
 
				             return (
			
 
				-                jsonify(
			
 
				-                    {
			
 
				-                        "status": "success",
			
 
				-                        "message": "Request received but not forwarded (no JSON body)",
			
 
				-                        "timestamp": timestamp,
			
 
				-                    }
			
 
				-                ),
			
 
				-                200,
			
 
				+                jsonify({"status": "error", "message": "No JSON body to process"}),
			
 
				+                400,
			
 
				             )
			
 
				 
			
 
				     except Exception as e:
			
 
				-        error_msg = f"Error processing request: {str(e)}"
			
 
				-        print(f"ERROR: {error_msg}")
			
 
				-        import traceback
			
 
				-
			
 
				+        error_msg = f"Exception: {str(e)}"
			
 
				+        print(f"❌ {error_msg}")
			
 
				         traceback.print_exc()
			
 
				 
			
 
				         req_info["error"] = error_msg
			
@@ -606,7 +414,6 @@ def webhook(subpath):
 
				         if len(recent_requests) > MAX_REQUESTS:
			
 
				             recent_requests.pop(0)
			
 
				 
			
 
				-        print(f"{'='*60}\n")
			
 
				         return jsonify({"error": {"message": error_msg}}), 500
			
 
				 
			
 
				 
			
@@ -617,52 +424,31 @@ def clear():
 
				     return jsonify({"status": "cleared"}), 200
			
 
				 
			
 
				 
			
 
				-@app.errorhandler(404)
			
 
				-def not_found(e):
			
 
				-    """Handle 404 errors with helpful message"""
			
 
				-    print(f"\n❌ 404 ERROR: {request.method} {request.path}")
			
 
				-    print(f"   Query string: {request.query_string.decode()}")
			
 
				-    print(f"   Full path: {request.full_path}")
			
 
				-    print(f"   Available routes:")
			
 
				-    for rule in app.url_map.iter_rules():
			
 
				-        print(f"   - {rule.methods} {rule.rule}")
			
 
				-    return (
			
 
				-        jsonify(
			
 
				-            {
			
 
				-                "error": "Not Found",
			
 
				-                "message": f"The path {request.path} was not found",
			
 
				-                "hint": "POST requests should go to /webhook or /webhook/<path>",
			
 
				-            }
			
 
				-        ),
			
 
				-        404,
			
 
				+@app.route("/test")
			
 
				+def test_endpoint():
			
 
				+    """Test endpoint to verify Ollama connection"""
			
 
				+    connected, models = test_ollama_connection()
			
 
				+    return jsonify(
			
 
				+        {"connected": connected, "endpoint": OLLAMA_ENDPOINT, "models": models}
			
 
				     )
			
 
				 
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				-    print("🚀 POST Request Monitor & AI Proxy starting...")
			
 
				-    print("📍 Web UI: http://localhost:5005")
			
 
				-    print("📮 Webhook endpoint: http://localhost:5005/webhook")
			
 
				-    print(
			
 
				-        "📮 Example: http://localhost:5005/webhook/models/model:generateContent?key=none"
			
 
				-    )
			
 
				-    print(f"🔗 Forwarding to: {OPENAI_ENDPOINT}")
			
 
				-    print(f"🤖 Model: {OPENAI_MODEL}")
			
 
				-    print(f"📹 Video format: {VIDEO_FORMAT}")
			
 
				-    endpoint_type = (
			
 
				-        ENDPOINT_TYPE
			
 
				-        if ENDPOINT_TYPE != "auto"
			
 
				-        else detect_endpoint_type(OPENAI_ENDPOINT)
			
 
				-    )
			
 
				-    print(f"🔧 Endpoint type: {endpoint_type}")
			
 
				-    print("\n" + "=" * 60)
			
 
				-    print("CONFIGURATION OPTIONS:")
			
 
				-    print("Set these environment variables to configure:")
			
 
				-    print("  OPENAI_ENDPOINT - Target endpoint URL")
			
 
				-    print("  ENDPOINT_TYPE - 'openai', 'ollama', or 'auto' (default)")
			
 
				-    print("  OPENAI_MODEL - Model name")
			
 
				-    print("  VIDEO_FORMAT - 'openai', 'vllm', 'skip', or 'error'")
			
 
				-    print("\nFor Ollama:")
			
 
				-    print("  OpenAI-compatible: http://localhost:11434/v1/chat/completions")
			
 
				-    print("  Native format: http://localhost:11434/api/generate")
			
 
				-    print("=" * 60)
			
 
				+    print("🦙 Ollama Proxy Server Starting...")
			
 
				+    print(f"🔗 Ollama Endpoint: {OLLAMA_ENDPOINT}")
			
 
				+    print(f"🤖 Model: {OLLAMA_MODEL}")
			
 
				+    print(f"📹 Video Format: {VIDEO_FORMAT}")
			
 
				+    print("📍 Web UI: http://localhost:5000")
			
 
				+    print("📮 Webhook: http://localhost:5000/webhook")
			
 
				+    print("🧪 Test: http://localhost:5000/test")
			
 
				+
			
 
				+    # Test connection on startup
			
 
				+    connected, models = test_ollama_connection()
			
 
				+    if not connected:
			
 
				+        print("\n⚠️  WARNING: Cannot connect to Ollama!")
			
 
				+        print("Please check:")
			
 
				+        print("1. Is Ollama running? (ollama serve)")
			
 
				+        print("2. Is it on the right port?")
			
 
				+        print("3. Set OLLAMA_ENDPOINT env var if different")
			
 
				+
			
 
				     app.run(host="0.0.0.0", port=5000, debug=True)