adri 1 сар өмнө
parent
commit
efe04cadd4
1 өөрчлөгдсөн 163 нэмэгдсэн , 377 устгасан
  1. 163 377
      app.py

+ 163 - 377
app.py

@@ -3,6 +3,7 @@ from datetime import datetime
 import json
 import os
 import requests
+import traceback
 
 app = Flask(__name__)
 
@@ -10,140 +11,37 @@ app = Flask(__name__)
 recent_requests = []
 MAX_REQUESTS = 50
 
-# OpenAI endpoint configuration
-OPENAI_ENDPOINT = os.getenv(
-    "OPENAI_ENDPOINT", "http://localhost:11434/v1/chat/completions"
-)
-OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "none")
-OPENAI_MODEL = os.getenv("OPENAI_MODEL", "InternVL3_5-14B")
-# Video format: 'openai' (data URL), 'vllm' (try vLLM format), 'skip', or 'error'
-VIDEO_FORMAT = os.getenv("VIDEO_FORMAT", "openai")
-
-# NEW: Endpoint type detection
-ENDPOINT_TYPE = os.getenv("ENDPOINT_TYPE", "auto")  # 'openai', 'ollama', or 'auto'
-
-
-def detect_endpoint_type(endpoint_url):
-    """Auto-detect if endpoint is OpenAI-compatible or Ollama native"""
-    if "/v1/chat/completions" in endpoint_url:
-        return "openai"
-    elif "/api/generate" in endpoint_url or "/api/chat" in endpoint_url:
-        return "ollama"
-    elif "localhost:11434" in endpoint_url or "ollama" in endpoint_url.lower():
-        return "openai"  # Assume OpenAI-compatible for Ollama
-    else:
-        return "openai"  # Default to OpenAI format
-
-
-def convert_gemini_to_openai(gemini_request):
-    """Convert Gemini API format to OpenAI API format"""
-    try:
-        contents = gemini_request.get("contents", [])
-        messages = []
-        media_info = {"images": [], "videos": []}
-
-        for content in contents:
-            parts = content.get("parts", [])
-            message_content = []
-
-            for part in parts:
-                # Handle text parts
-                if "text" in part:
-                    message_content.append({"type": "text", "text": part["text"]})
-
-                # Handle inline_data (images/video)
-                elif "inline_data" in part:
-                    inline = part["inline_data"]
-                    mime_type = inline.get("mime_type", "")
-                    data = inline.get("data", "")
-
-                    if mime_type.startswith("image/"):
-                        # Images: Universally supported across all runners
-                        media_info["images"].append(mime_type)
-                        print(f"🖼️  Adding image: {mime_type}")
-                        message_content.append(
-                            {
-                                "type": "image_url",
-                                "image_url": {
-                                    "url": f"data:{mime_type};base64,{data}",
-                                    "detail": "auto",
-                                },
-                            }
-                        )
-
-                    elif mime_type.startswith("video/"):
-                        # Videos: Format depends on VIDEO_FORMAT setting
-                        if VIDEO_FORMAT == "skip":
-                            media_info["videos"].append(f"skipped ({mime_type})")
-                            print(f"⏭️  Skipping video: {mime_type} (VIDEO_FORMAT=skip)")
-                            message_content.append(
-                                {
-                                    "type": "text",
-                                    "text": f"[Video content ({mime_type}) was present but skipped]",
-                                }
-                            )
-
-                        elif VIDEO_FORMAT == "error":
-                            raise ValueError(
-                                f"Video content detected ({mime_type}) but VIDEO_FORMAT=error"
-                            )
-
-                        else:  # 'openai', 'vllm', or any other value
-                            media_info["videos"].append(
-                                f"format: {VIDEO_FORMAT} ({mime_type})"
-                            )
-                            print(
-                                f"📹 Adding video ({VIDEO_FORMAT} format): {mime_type}"
-                            )
-                            message_content.append(
-                                {
-                                    "type": "image_url",
-                                    "image_url": {
-                                        "url": f"data:{mime_type};base64,{data}",
-                                        "detail": "auto",
-                                    },
-                                }
-                            )
-
-            # Add as user message
-            # If only one content item and it's text, send as string for better compatibility
-            if len(message_content) == 1 and message_content[0].get("type") == "text":
-                messages.append({"role": "user", "content": message_content[0]["text"]})
-            else:
-                messages.append({"role": "user", "content": message_content})
+# Configuration with better defaults
+OLLAMA_ENDPOINT = os.getenv("OLLAMA_ENDPOINT", "http://localhost:11434")
+OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "llama2")  # More common default
+VIDEO_FORMAT = os.getenv("VIDEO_FORMAT", "skip")  # Skip videos by default for debugging
 
-        # Build OpenAI request
-        openai_request = {"model": OPENAI_MODEL, "messages": messages}
 
-        # Add generation config as OpenAI parameters
-        gen_config = gemini_request.get("generationConfig", {})
-        if "maxOutputTokens" in gen_config:
-            openai_request["max_tokens"] = gen_config["maxOutputTokens"]
-        if "temperature" in gen_config:
-            openai_request["temperature"] = gen_config["temperature"]
-
-        # Log media summary
-        if media_info["images"] or media_info["videos"]:
-            print(f"📊 Media summary:")
-            if media_info["images"]:
-                print(
-                    f"   Images: {len(media_info['images'])} ({', '.join(media_info['images'])})"
-                )
-            if media_info["videos"]:
-                print(f"   Videos: {', '.join(media_info['videos'])}")
-
-        return openai_request
+def test_ollama_connection():
+    """Test connection to Ollama and get available models"""
+    try:
+        # Try to get model list
+        response = requests.get(f"{OLLAMA_ENDPOINT}/api/tags", timeout=5)
+        if response.status_code == 200:
+            models = response.json().get("models", [])
+            model_names = [m.get("name", "") for m in models]
+            print(f"✅ Connected to Ollama at {OLLAMA_ENDPOINT}")
+            print(f"Available models: {model_names}")
+            return True, model_names
+        else:
+            print(f"❌ Ollama responded with {response.status_code}: {response.text}")
+            return False, []
     except Exception as e:
-        print(f"❌ Error converting request: {e}")
-        raise
+        print(f"❌ Cannot connect to Ollama at {OLLAMA_ENDPOINT}: {e}")
+        return False, []
 
 
-def convert_gemini_to_ollama(gemini_request):
-    """Convert Gemini API format to Ollama native format"""
+def convert_gemini_to_ollama_generate(gemini_request):
+    """Convert Gemini format to Ollama /api/generate format"""
     try:
         contents = gemini_request.get("contents", [])
 
-        # Extract text and combine into a single prompt
+        # Extract text parts and combine
         prompt_parts = []
         images = []
 
@@ -158,26 +56,35 @@ def convert_gemini_to_ollama(gemini_request):
                     mime_type = inline.get("mime_type", "")
                     data = inline.get("data", "")
 
-                    if mime_type.startswith("image/") or mime_type.startswith("video/"):
-                        # Ollama expects images in a different format
-                        images.append(data)  # Just the base64 data
-                        print(f"🖼️ Adding media for Ollama: {mime_type}")
+                    if VIDEO_FORMAT == "skip" and (
+                        mime_type.startswith("video/") or mime_type.startswith("image/")
+                    ):
+                        prompt_parts.append(
+                            f"[Media content ({mime_type}) was present but skipped for debugging]"
+                        )
+                        print(f"⏭️ Skipping media: {mime_type}")
+                    elif mime_type.startswith("image/"):
+                        images.append(data)
+                        print(f"🖼️ Adding image: {mime_type}")
 
         # Build Ollama request
         ollama_request = {
-            "model": OPENAI_MODEL,
-            "prompt": " ".join(prompt_parts),
+            "model": OLLAMA_MODEL,
+            "prompt": " ".join(prompt_parts) if prompt_parts else "Hello",
             "stream": False,
+            "options": {},
         }
 
-        # Add images if present
-        if images:
+        # Add images if present and not skipping
+        if images and VIDEO_FORMAT != "skip":
             ollama_request["images"] = images
 
         # Add generation config
         gen_config = gemini_request.get("generationConfig", {})
         if "temperature" in gen_config:
-            ollama_request["options"] = {"temperature": gen_config["temperature"]}
+            ollama_request["options"]["temperature"] = gen_config["temperature"]
+        if "maxOutputTokens" in gen_config:
+            ollama_request["options"]["num_predict"] = gen_config["maxOutputTokens"]
 
         return ollama_request
     except Exception as e:
@@ -185,59 +92,22 @@ def convert_gemini_to_ollama(gemini_request):
         raise
 
 
-def convert_openai_to_gemini(openai_response):
-    """Convert OpenAI API response to Gemini API format"""
+def convert_ollama_generate_to_gemini(ollama_response):
+    """Convert Ollama /api/generate response to Gemini format"""
     try:
-        # Extract the message content
-        choices = openai_response.get("choices", [])
-        if not choices:
-            print(f"❌ No choices in OpenAI response: {openai_response}")
-            return {"error": "No response generated"}
-
-        message = choices[0].get("message", {})
-        content = message.get("content", "")
-
-        if not content:
-            print(f"❌ No content in message: {message}")
-            return {"error": "No response generated"}
-
-        # Convert to Gemini format
-        gemini_response = {
-            "candidates": [
-                {
-                    "content": {"parts": [{"text": content}], "role": "model"},
-                    "finishReason": "STOP",
-                    "index": 0,
-                }
-            ],
-            "usageMetadata": {
-                "promptTokenCount": openai_response.get("usage", {}).get(
-                    "prompt_tokens", 0
-                ),
-                "candidatesTokenCount": openai_response.get("usage", {}).get(
-                    "completion_tokens", 0
-                ),
-                "totalTokenCount": openai_response.get("usage", {}).get(
-                    "total_tokens", 0
-                ),
-            },
-        }
-
-        return gemini_response
-    except Exception as e:
-        print(f"❌ Error converting OpenAI response: {e}")
-        raise
-
+        print(
+            f"🔄 Converting Ollama response: {json.dumps(ollama_response, indent=2)[:500]}..."
+        )
 
-def convert_ollama_to_gemini(ollama_response):
-    """Convert Ollama native response to Gemini API format"""
-    try:
         # Ollama /api/generate returns: {"response": "text", "done": true, ...}
         response_text = ollama_response.get("response", "")
 
         if not response_text:
-            print(f"❌ No response text in Ollama response: {ollama_response}")
-            return {"error": "No response generated"}
+            print(f"❌ No 'response' field in Ollama response")
+            print(f"Full response keys: {list(ollama_response.keys())}")
+            return {"error": "No response text from Ollama"}
+
+        print(f"✅ Found response text: {response_text[:100]}...")
 
         # Convert to Gemini format
         gemini_response = {
@@ -259,18 +129,23 @@ def convert_ollama_to_gemini(ollama_response):
         return gemini_response
     except Exception as e:
         print(f"❌ Error converting Ollama response: {e}")
-        raise
+        print(f"Ollama response was: {ollama_response}")
+        traceback.print_exc()
+        return {"error": f"Conversion error: {str(e)}"}
 
 
 HTML_TEMPLATE = """
 <!DOCTYPE html>
 <html>
 <head>
-    <title>POST Request Monitor</title>
+    <title>Ollama Proxy Debug</title>
     <style>
         body { font-family: Arial, sans-serif; margin: 20px; background: #f5f5f5; }
         h1 { color: #333; }
-        .config { background: #e3f2fd; padding: 10px; margin: 10px 0; border-radius: 5px; }
+        .config { background: #e3f2fd; padding: 15px; margin: 10px 0; border-radius: 5px; }
+        .status { padding: 10px; margin: 10px 0; border-radius: 5px; }
+        .status.connected { background: #c8e6c9; }
+        .status.error { background: #ffcdd2; }
         .request { 
             background: white; 
             padding: 15px; 
@@ -329,21 +204,30 @@ HTML_TEMPLATE = """
             fetch('/clear', { method: 'POST' })
                 .then(() => location.reload());
         }
-        // Auto-refresh every 3 seconds
-        setTimeout(() => location.reload(), 3000);
+        // Auto-refresh every 5 seconds
+        setTimeout(() => location.reload(), 5000);
     </script>
 </head>
 <body>
-    <h1>📬 POST Request Monitor & AI Proxy</h1>
+    <h1>🦙 Ollama Proxy Debug Interface</h1>
+    
     <div class="config">
         <strong>Configuration:</strong><br>
-        Endpoint: <strong>{{ endpoint }}</strong><br>
-        Type: <strong>{{ endpoint_type }}</strong><br>
+        Ollama Endpoint: <strong>{{ endpoint }}</strong><br>
         Model: <strong>{{ model }}</strong><br>
         Video Format: <strong>{{ video_format }}</strong>
     </div>
-    <p>Send POST requests to <strong>http://localhost:5005/webhook</strong></p>
+    
+    <div class="status {{ status_class }}">
+        <strong>Ollama Status:</strong> {{ status_message }}<br>
+        {% if models %}
+        <strong>Available Models:</strong> {{ models|join(', ') }}
+        {% endif %}
+    </div>
+    
+    <p><strong>Send requests to:</strong> http://localhost:5000/webhook</p>
     <button class="clear-btn" onclick="clearRequests()">Clear All</button>
+    
     <div id="requests">
         {% for req in requests %}
         <div class="request">
@@ -351,35 +235,31 @@ HTML_TEMPLATE = """
                 <span class="method">{{ req.method }}</span>
                 <span class="timestamp">{{ req.timestamp }}</span>
                 {% if req.forwarded %}
-                <span class="forwarded">FORWARDED ({{ req.endpoint_type }})</span>
+                <span class="forwarded">SENT TO OLLAMA</span>
                 {% endif %}
                 {% if req.error %}
                 <span class="error-badge">ERROR</span>
                 {% endif %}
             </div>
             <div><strong>Path:</strong> {{ req.path }}</div>
-            {% if req.query_params %}
-            <div><strong>Query Parameters:</strong></div>
-            <pre>{{ req.query_params }}</pre>
-            {% endif %}
             {% if req.body %}
-            <div><strong>Incoming Body (Gemini Format):</strong></div>
+            <div><strong>Incoming (Gemini Format):</strong></div>
             <pre>{{ req.body }}</pre>
             {% endif %}
-            {% if req.converted_request %}
-            <div><strong>Converted Request:</strong></div>
-            <pre>{{ req.converted_request }}</pre>
+            {% if req.ollama_request %}
+            <div><strong>Sent to Ollama:</strong></div>
+            <pre>{{ req.ollama_request }}</pre>
             {% endif %}
-            {% if req.raw_response %}
-            <div><strong>Raw Response:</strong></div>
-            <pre>{{ req.raw_response }}</pre>
+            {% if req.ollama_response %}
+            <div><strong>Ollama Raw Response:</strong></div>
+            <pre>{{ req.ollama_response }}</pre>
             {% endif %}
             {% if req.response %}
             <div><strong>Final Response (Gemini Format):</strong></div>
             <pre>{{ req.response }}</pre>
             {% endif %}
             {% if req.error %}
-            <div><strong>Error:</strong></div>
+            <div><strong>Error Details:</strong></div>
             <pre style="color: red;">{{ req.error }}</pre>
             {% endif %}
         </div>
@@ -392,183 +272,121 @@ HTML_TEMPLATE = """
 
 @app.route("/")
 def index():
-    endpoint_type = (
-        ENDPOINT_TYPE
-        if ENDPOINT_TYPE != "auto"
-        else detect_endpoint_type(OPENAI_ENDPOINT)
-    )
+    connected, models = test_ollama_connection()
+    status_class = "connected" if connected else "error"
+    status_message = "Connected ✅" if connected else "Cannot connect ❌"
+
     return render_template_string(
         HTML_TEMPLATE,
         requests=reversed(recent_requests),
-        endpoint=OPENAI_ENDPOINT,
-        endpoint_type=endpoint_type,
-        model=OPENAI_MODEL,
+        endpoint=OLLAMA_ENDPOINT,
+        model=OLLAMA_MODEL,
         video_format=VIDEO_FORMAT,
+        status_class=status_class,
+        status_message=status_message,
+        models=models,
     )
 
 
 @app.route("/webhook", methods=["POST", "PUT", "PATCH"], defaults={"subpath": ""})
 @app.route("/webhook/<path:subpath>", methods=["POST", "PUT", "PATCH"])
 def webhook(subpath):
-    """Accept POST/PUT/PATCH requests, forward to AI endpoint, and return response"""
+    """Accept requests and forward to Ollama /api/generate"""
     timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-
-    # Get full path with query parameters
     full_path = request.full_path if request.query_string else request.path
 
     print(f"\n{'='*60}")
     print(f"[{timestamp}] INCOMING {request.method} {full_path}")
-    print(f"Matched route with subpath: '{subpath}'")
     print(f"{'='*60}")
 
-    # Detect endpoint type
-    endpoint_type = (
-        ENDPOINT_TYPE
-        if ENDPOINT_TYPE != "auto"
-        else detect_endpoint_type(OPENAI_ENDPOINT)
-    )
-    print(f"Detected endpoint type: {endpoint_type}")
-
     # Get request data
     try:
-        body = request.get_data(as_text=True)
         gemini_request = request.get_json() if request.is_json else {}
-        body_display = json.dumps(gemini_request, indent=2) if gemini_request else body
+        body_display = (
+            json.dumps(gemini_request, indent=2) if gemini_request else "No JSON body"
+        )
     except Exception as e:
-        body_display = str(request.get_data())
+        body_display = f"Error parsing JSON: {e}"
         gemini_request = {}
 
-    # Print request details
-    if request.args:
-        print("Query Parameters:")
-        for key, value in request.args.items():
-            print(f"  {key}: {value}")
-    print(
-        f"Body preview:\n{body_display[:500]}{'...' if len(body_display) > 500 else ''}"
-    )
+    print(f"Request body: {body_display[:300]}...")
 
-    # Store request info for monitoring
+    # Store request info
     req_info = {
         "timestamp": timestamp,
         "method": request.method,
         "path": full_path,
-        "query_params": dict(request.args),
         "body": body_display,
         "forwarded": False,
         "response": None,
         "error": None,
-        "endpoint_type": endpoint_type,
-        "converted_request": None,
-        "raw_response": None,
+        "ollama_request": None,
+        "ollama_response": None,
     }
 
-    # Try to forward to AI endpoint
     try:
         if gemini_request:
-            print(f"\n{'='*60}")
-            print(f"CONVERTING AND FORWARDING TO {endpoint_type.upper()} ENDPOINT")
-            print(f"Target: {OPENAI_ENDPOINT}")
-            print(f"{'='*60}")
-
-            # Convert based on endpoint type
-            if endpoint_type == "ollama":
-                converted_request = convert_gemini_to_ollama(gemini_request)
-            else:  # openai
-                converted_request = convert_gemini_to_openai(gemini_request)
-
-            # Log the converted request (truncate base64 for readability)
-            logged_request = json.loads(json.dumps(converted_request))
-            if endpoint_type == "openai":
-                for msg in logged_request.get("messages", []):
-                    if isinstance(msg.get("content"), list):
-                        for item in msg["content"]:
-                            if item.get("type") == "image_url":
-                                url = item["image_url"]["url"]
-                                if len(url) > 100:
-                                    item["image_url"]["url"] = (
-                                        url[:50] + "...[base64 data]..." + url[-20:]
-                                    )
-            elif "images" in logged_request:
-                # Truncate Ollama images
-                for i, img in enumerate(logged_request["images"]):
-                    if len(img) > 100:
-                        logged_request["images"][i] = (
-                            img[:50] + "...[base64 data]..." + img[-20:]
-                        )
+            print(f"\n🔄 Converting to Ollama format...")
 
-            print(f"Converted request:\n{json.dumps(logged_request, indent=2)}")
+            # Convert to Ollama format
+            ollama_request = convert_gemini_to_ollama_generate(gemini_request)
+            req_info["ollama_request"] = json.dumps(ollama_request, indent=2)
 
-            # Forward to endpoint
-            headers = {
-                "Content-Type": "application/json",
-            }
-            if (
-                OPENAI_API_KEY
-                and OPENAI_API_KEY != "none"
-                and endpoint_type == "openai"
-            ):
-                headers["Authorization"] = f"Bearer {OPENAI_API_KEY}"
+            print(f"Ollama request: {json.dumps(ollama_request, indent=2)}")
+
+            # Send to Ollama
+            print(f"\n📤 Sending to Ollama: {OLLAMA_ENDPOINT}/api/generate")
 
-            print(f"Sending request to {OPENAI_ENDPOINT}...")
             response = requests.post(
-                OPENAI_ENDPOINT, json=converted_request, headers=headers, timeout=120
+                f"{OLLAMA_ENDPOINT}/api/generate",
+                json=ollama_request,
+                headers={"Content-Type": "application/json"},
+                timeout=120,
             )
 
-            print(f"\nResponse Status: {response.status_code}")
-            print(f"Response Headers: {dict(response.headers)}")
+            print(f"📥 Ollama response status: {response.status_code}")
 
             if response.status_code == 200:
-                raw_response = response.json()
-                print(f"Raw Response:\n{json.dumps(raw_response, indent=2)[:1000]}...")
+                ollama_response = response.json()
+                req_info["ollama_response"] = json.dumps(ollama_response, indent=2)
 
-                # Convert back to Gemini format based on endpoint type
-                if endpoint_type == "ollama":
-                    gemini_response = convert_ollama_to_gemini(raw_response)
-                else:  # openai
-                    gemini_response = convert_openai_to_gemini(raw_response)
+                print(f"✅ Ollama responded successfully")
 
-                print(
-                    f"\nConverted Gemini Response:\n{json.dumps(gemini_response, indent=2)[:1000]}..."
-                )
+                # Convert back to Gemini format
+                gemini_response = convert_ollama_generate_to_gemini(ollama_response)
 
-                req_info["forwarded"] = True
-                req_info["response"] = json.dumps(gemini_response, indent=2)
-                req_info["converted_request"] = json.dumps(logged_request, indent=2)
-                req_info["raw_response"] = json.dumps(raw_response, indent=2)[:2000] + (
-                    "..." if len(json.dumps(raw_response, indent=2)) > 2000 else ""
-                )
+                if "error" in gemini_response:
+                    req_info["error"] = gemini_response["error"]
+                    req_info["response"] = json.dumps(gemini_response, indent=2)
+                else:
+                    req_info["forwarded"] = True
+                    req_info["response"] = json.dumps(gemini_response, indent=2)
+                    print(f"✅ Conversion successful")
 
                 recent_requests.append(req_info)
                 if len(recent_requests) > MAX_REQUESTS:
                     recent_requests.pop(0)
 
-                print(f"{'='*60}\n")
                 return jsonify(gemini_response), 200
+
             else:
-                # Get detailed error
-                try:
-                    error_data = response.json()
-                    error_msg = json.dumps(error_data, indent=2)
-                except:
-                    error_msg = response.text
-
-                full_error = f"{endpoint_type.upper()} endpoint returned {response.status_code}:\n{error_msg}"
-                print(f"ERROR: {full_error}")
-                req_info["error"] = full_error
+                error_text = response.text
+                error_msg = f"Ollama returned {response.status_code}: {error_text}"
+                print(f"❌ {error_msg}")
+
+                req_info["error"] = error_msg
+                req_info["ollama_response"] = error_text
                 req_info["forwarded"] = True
-                req_info["converted_request"] = json.dumps(logged_request, indent=2)
-                req_info["raw_response"] = error_msg
 
                 recent_requests.append(req_info)
                 if len(recent_requests) > MAX_REQUESTS:
                     recent_requests.pop(0)
 
-                print(f"{'='*60}\n")
                 return (
                     jsonify(
                         {
                             "error": {
-                                "message": error_msg,
+                                "message": error_text,
                                 "status": response.status_code,
                             }
                         }
@@ -576,29 +394,19 @@ def webhook(subpath):
                     response.status_code,
                 )
         else:
-            # No JSON body, just acknowledge
-            req_info["error"] = "No JSON body to forward"
+            req_info["error"] = "No JSON body received"
             recent_requests.append(req_info)
             if len(recent_requests) > MAX_REQUESTS:
                 recent_requests.pop(0)
 
-            print(f"{'='*60}\n")
             return (
-                jsonify(
-                    {
-                        "status": "success",
-                        "message": "Request received but not forwarded (no JSON body)",
-                        "timestamp": timestamp,
-                    }
-                ),
-                200,
+                jsonify({"status": "error", "message": "No JSON body to process"}),
+                400,
             )
 
     except Exception as e:
-        error_msg = f"Error processing request: {str(e)}"
-        print(f"ERROR: {error_msg}")
-        import traceback
-
+        error_msg = f"Exception: {str(e)}"
+        print(f"❌ {error_msg}")
         traceback.print_exc()
 
         req_info["error"] = error_msg
@@ -606,7 +414,6 @@ def webhook(subpath):
         if len(recent_requests) > MAX_REQUESTS:
             recent_requests.pop(0)
 
-        print(f"{'='*60}\n")
         return jsonify({"error": {"message": error_msg}}), 500
 
 
@@ -617,52 +424,31 @@ def clear():
     return jsonify({"status": "cleared"}), 200
 
 
-@app.errorhandler(404)
-def not_found(e):
-    """Handle 404 errors with helpful message"""
-    print(f"\n❌ 404 ERROR: {request.method} {request.path}")
-    print(f"   Query string: {request.query_string.decode()}")
-    print(f"   Full path: {request.full_path}")
-    print(f"   Available routes:")
-    for rule in app.url_map.iter_rules():
-        print(f"   - {rule.methods} {rule.rule}")
-    return (
-        jsonify(
-            {
-                "error": "Not Found",
-                "message": f"The path {request.path} was not found",
-                "hint": "POST requests should go to /webhook or /webhook/<path>",
-            }
-        ),
-        404,
+@app.route("/test")
+def test_endpoint():
+    """Test endpoint to verify Ollama connection"""
+    connected, models = test_ollama_connection()
+    return jsonify(
+        {"connected": connected, "endpoint": OLLAMA_ENDPOINT, "models": models}
     )
 
 
 if __name__ == "__main__":
-    print("🚀 POST Request Monitor & AI Proxy starting...")
-    print("📍 Web UI: http://localhost:5005")
-    print("📮 Webhook endpoint: http://localhost:5005/webhook")
-    print(
-        "📮 Example: http://localhost:5005/webhook/models/model:generateContent?key=none"
-    )
-    print(f"🔗 Forwarding to: {OPENAI_ENDPOINT}")
-    print(f"🤖 Model: {OPENAI_MODEL}")
-    print(f"📹 Video format: {VIDEO_FORMAT}")
-    endpoint_type = (
-        ENDPOINT_TYPE
-        if ENDPOINT_TYPE != "auto"
-        else detect_endpoint_type(OPENAI_ENDPOINT)
-    )
-    print(f"🔧 Endpoint type: {endpoint_type}")
-    print("\n" + "=" * 60)
-    print("CONFIGURATION OPTIONS:")
-    print("Set these environment variables to configure:")
-    print("  OPENAI_ENDPOINT - Target endpoint URL")
-    print("  ENDPOINT_TYPE - 'openai', 'ollama', or 'auto' (default)")
-    print("  OPENAI_MODEL - Model name")
-    print("  VIDEO_FORMAT - 'openai', 'vllm', 'skip', or 'error'")
-    print("\nFor Ollama:")
-    print("  OpenAI-compatible: http://localhost:11434/v1/chat/completions")
-    print("  Native format: http://localhost:11434/api/generate")
-    print("=" * 60)
+    print("🦙 Ollama Proxy Server Starting...")
+    print(f"🔗 Ollama Endpoint: {OLLAMA_ENDPOINT}")
+    print(f"🤖 Model: {OLLAMA_MODEL}")
+    print(f"📹 Video Format: {VIDEO_FORMAT}")
+    print("📍 Web UI: http://localhost:5000")
+    print("📮 Webhook: http://localhost:5000/webhook")
+    print("🧪 Test: http://localhost:5000/test")
+
+    # Test connection on startup
+    connected, models = test_ollama_connection()
+    if not connected:
+        print("\n⚠️  WARNING: Cannot connect to Ollama!")
+        print("Please check:")
+        print("1. Is Ollama running? (ollama serve)")
+        print("2. Is it on the right port?")
+        print("3. Set OLLAMA_ENDPOINT env var if different")
+
     app.run(host="0.0.0.0", port=5000, debug=True)