from flask import Flask, request, jsonify, render_template_string from datetime import datetime import json import os import requests app = Flask(__name__) # Store recent requests in memory recent_requests = [] MAX_REQUESTS = 50 # OpenAI endpoint configuration OPENAI_ENDPOINT = os.getenv( "OPENAI_ENDPOINT", "http://localhost:11434/v1/chat/completions" ) OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "none") OPENAI_MODEL = os.getenv("OPENAI_MODEL", "InternVL3_5-14B") # Video format: 'openai' (data URL), 'vllm' (try vLLM format), 'skip', or 'error' VIDEO_FORMAT = os.getenv("VIDEO_FORMAT", "openai") # NEW: Endpoint type detection ENDPOINT_TYPE = os.getenv("ENDPOINT_TYPE", "auto") # 'openai', 'ollama', or 'auto' def detect_endpoint_type(endpoint_url): """Auto-detect if endpoint is OpenAI-compatible or Ollama native""" if "/v1/chat/completions" in endpoint_url: return "openai" elif "/api/generate" in endpoint_url or "/api/chat" in endpoint_url: return "ollama" elif "localhost:11434" in endpoint_url or "ollama" in endpoint_url.lower(): return "openai" # Assume OpenAI-compatible for Ollama else: return "openai" # Default to OpenAI format def convert_gemini_to_openai(gemini_request): """Convert Gemini API format to OpenAI API format""" try: contents = gemini_request.get("contents", []) messages = [] media_info = {"images": [], "videos": []} for content in contents: parts = content.get("parts", []) message_content = [] for part in parts: # Handle text parts if "text" in part: message_content.append({"type": "text", "text": part["text"]}) # Handle inline_data (images/video) elif "inline_data" in part: inline = part["inline_data"] mime_type = inline.get("mime_type", "") data = inline.get("data", "") if mime_type.startswith("image/"): # Images: Universally supported across all runners media_info["images"].append(mime_type) print(f"šŸ–¼ļø Adding image: {mime_type}") message_content.append( { "type": "image_url", "image_url": { "url": f"data:{mime_type};base64,{data}", "detail": "auto", }, } ) elif mime_type.startswith("video/"): # Videos: Format depends on VIDEO_FORMAT setting if VIDEO_FORMAT == "skip": media_info["videos"].append(f"skipped ({mime_type})") print(f"ā­ļø Skipping video: {mime_type} (VIDEO_FORMAT=skip)") message_content.append( { "type": "text", "text": f"[Video content ({mime_type}) was present but skipped]", } ) elif VIDEO_FORMAT == "error": raise ValueError( f"Video content detected ({mime_type}) but VIDEO_FORMAT=error" ) else: # 'openai', 'vllm', or any other value media_info["videos"].append( f"format: {VIDEO_FORMAT} ({mime_type})" ) print( f"šŸ“¹ Adding video ({VIDEO_FORMAT} format): {mime_type}" ) message_content.append( { "type": "image_url", "image_url": { "url": f"data:{mime_type};base64,{data}", "detail": "auto", }, } ) # Add as user message # If only one content item and it's text, send as string for better compatibility if len(message_content) == 1 and message_content[0].get("type") == "text": messages.append({"role": "user", "content": message_content[0]["text"]}) else: messages.append({"role": "user", "content": message_content}) # Build OpenAI request openai_request = {"model": OPENAI_MODEL, "messages": messages} # Add generation config as OpenAI parameters gen_config = gemini_request.get("generationConfig", {}) if "maxOutputTokens" in gen_config: openai_request["max_tokens"] = gen_config["maxOutputTokens"] if "temperature" in gen_config: openai_request["temperature"] = gen_config["temperature"] # Log media summary if media_info["images"] or media_info["videos"]: print(f"šŸ“Š Media summary:") if media_info["images"]: print( f" Images: {len(media_info['images'])} ({', '.join(media_info['images'])})" ) if media_info["videos"]: print(f" Videos: {', '.join(media_info['videos'])}") return openai_request except Exception as e: print(f"āŒ Error converting request: {e}") raise def convert_gemini_to_ollama(gemini_request): """Convert Gemini API format to Ollama native format""" try: contents = gemini_request.get("contents", []) # Extract text and combine into a single prompt prompt_parts = [] images = [] for content in contents: parts = content.get("parts", []) for part in parts: if "text" in part: prompt_parts.append(part["text"]) elif "inline_data" in part: inline = part["inline_data"] mime_type = inline.get("mime_type", "") data = inline.get("data", "") if mime_type.startswith("image/") or mime_type.startswith("video/"): # Ollama expects images in a different format images.append(data) # Just the base64 data print(f"šŸ–¼ļø Adding media for Ollama: {mime_type}") # Build Ollama request ollama_request = { "model": OPENAI_MODEL, "prompt": " ".join(prompt_parts), "stream": False, } # Add images if present if images: ollama_request["images"] = images # Add generation config gen_config = gemini_request.get("generationConfig", {}) if "temperature" in gen_config: ollama_request["options"] = {"temperature": gen_config["temperature"]} return ollama_request except Exception as e: print(f"āŒ Error converting to Ollama format: {e}") raise def convert_openai_to_gemini(openai_response): """Convert OpenAI API response to Gemini API format""" try: # Extract the message content choices = openai_response.get("choices", []) if not choices: print(f"āŒ No choices in OpenAI response: {openai_response}") return {"error": "No response generated"} message = choices[0].get("message", {}) content = message.get("content", "") if not content: print(f"āŒ No content in message: {message}") return {"error": "No response generated"} # Convert to Gemini format gemini_response = { "candidates": [ { "content": {"parts": [{"text": content}], "role": "model"}, "finishReason": "STOP", "index": 0, } ], "usageMetadata": { "promptTokenCount": openai_response.get("usage", {}).get( "prompt_tokens", 0 ), "candidatesTokenCount": openai_response.get("usage", {}).get( "completion_tokens", 0 ), "totalTokenCount": openai_response.get("usage", {}).get( "total_tokens", 0 ), }, } return gemini_response except Exception as e: print(f"āŒ Error converting OpenAI response: {e}") raise def convert_ollama_to_gemini(ollama_response): """Convert Ollama native response to Gemini API format""" try: # Ollama /api/generate returns: {"response": "text", "done": true, ...} response_text = ollama_response.get("response", "") if not response_text: print(f"āŒ No response text in Ollama response: {ollama_response}") return {"error": "No response generated"} # Convert to Gemini format gemini_response = { "candidates": [ { "content": {"parts": [{"text": response_text}], "role": "model"}, "finishReason": "STOP", "index": 0, } ], "usageMetadata": { "promptTokenCount": ollama_response.get("prompt_eval_count", 0), "candidatesTokenCount": ollama_response.get("eval_count", 0), "totalTokenCount": ollama_response.get("prompt_eval_count", 0) + ollama_response.get("eval_count", 0), }, } return gemini_response except Exception as e: print(f"āŒ Error converting Ollama response: {e}") raise HTML_TEMPLATE = """ POST Request Monitor

šŸ“¬ POST Request Monitor & AI Proxy

Configuration:
Endpoint: {{ endpoint }}
Type: {{ endpoint_type }}
Model: {{ model }}
Video Format: {{ video_format }}

Send POST requests to http://localhost:5005/webhook

{% for req in requests %}
{{ req.method }} {{ req.timestamp }} {% if req.forwarded %} FORWARDED ({{ req.endpoint_type }}) {% endif %} {% if req.error %} ERROR {% endif %}
Path: {{ req.path }}
{% if req.query_params %}
Query Parameters:
{{ req.query_params }}
{% endif %} {% if req.body %}
Incoming Body (Gemini Format):
{{ req.body }}
{% endif %} {% if req.converted_request %}
Converted Request:
{{ req.converted_request }}
{% endif %} {% if req.raw_response %}
Raw Response:
{{ req.raw_response }}
{% endif %} {% if req.response %}
Final Response (Gemini Format):
{{ req.response }}
{% endif %} {% if req.error %}
Error:
{{ req.error }}
{% endif %}
{% endfor %}
""" @app.route("/") def index(): endpoint_type = ( ENDPOINT_TYPE if ENDPOINT_TYPE != "auto" else detect_endpoint_type(OPENAI_ENDPOINT) ) return render_template_string( HTML_TEMPLATE, requests=reversed(recent_requests), endpoint=OPENAI_ENDPOINT, endpoint_type=endpoint_type, model=OPENAI_MODEL, video_format=VIDEO_FORMAT, ) @app.route("/webhook", methods=["POST", "PUT", "PATCH"], defaults={"subpath": ""}) @app.route("/webhook/", methods=["POST", "PUT", "PATCH"]) def webhook(subpath): """Accept POST/PUT/PATCH requests, forward to AI endpoint, and return response""" timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") # Get full path with query parameters full_path = request.full_path if request.query_string else request.path print(f"\n{'='*60}") print(f"[{timestamp}] INCOMING {request.method} {full_path}") print(f"Matched route with subpath: '{subpath}'") print(f"{'='*60}") # Detect endpoint type endpoint_type = ( ENDPOINT_TYPE if ENDPOINT_TYPE != "auto" else detect_endpoint_type(OPENAI_ENDPOINT) ) print(f"Detected endpoint type: {endpoint_type}") # Get request data try: body = request.get_data(as_text=True) gemini_request = request.get_json() if request.is_json else {} body_display = json.dumps(gemini_request, indent=2) if gemini_request else body except Exception as e: body_display = str(request.get_data()) gemini_request = {} # Print request details if request.args: print("Query Parameters:") for key, value in request.args.items(): print(f" {key}: {value}") print( f"Body preview:\n{body_display[:500]}{'...' if len(body_display) > 500 else ''}" ) # Store request info for monitoring req_info = { "timestamp": timestamp, "method": request.method, "path": full_path, "query_params": dict(request.args), "body": body_display, "forwarded": False, "response": None, "error": None, "endpoint_type": endpoint_type, "converted_request": None, "raw_response": None, } # Try to forward to AI endpoint try: if gemini_request: print(f"\n{'='*60}") print(f"CONVERTING AND FORWARDING TO {endpoint_type.upper()} ENDPOINT") print(f"Target: {OPENAI_ENDPOINT}") print(f"{'='*60}") # Convert based on endpoint type if endpoint_type == "ollama": converted_request = convert_gemini_to_ollama(gemini_request) else: # openai converted_request = convert_gemini_to_openai(gemini_request) # Log the converted request (truncate base64 for readability) logged_request = json.loads(json.dumps(converted_request)) if endpoint_type == "openai": for msg in logged_request.get("messages", []): if isinstance(msg.get("content"), list): for item in msg["content"]: if item.get("type") == "image_url": url = item["image_url"]["url"] if len(url) > 100: item["image_url"]["url"] = ( url[:50] + "...[base64 data]..." + url[-20:] ) elif "images" in logged_request: # Truncate Ollama images for i, img in enumerate(logged_request["images"]): if len(img) > 100: logged_request["images"][i] = ( img[:50] + "...[base64 data]..." + img[-20:] ) print(f"Converted request:\n{json.dumps(logged_request, indent=2)}") # Forward to endpoint headers = { "Content-Type": "application/json", } if ( OPENAI_API_KEY and OPENAI_API_KEY != "none" and endpoint_type == "openai" ): headers["Authorization"] = f"Bearer {OPENAI_API_KEY}" print(f"Sending request to {OPENAI_ENDPOINT}...") response = requests.post( OPENAI_ENDPOINT, json=converted_request, headers=headers, timeout=120 ) print(f"\nResponse Status: {response.status_code}") print(f"Response Headers: {dict(response.headers)}") if response.status_code == 200: raw_response = response.json() print(f"Raw Response:\n{json.dumps(raw_response, indent=2)[:1000]}...") # Convert back to Gemini format based on endpoint type if endpoint_type == "ollama": gemini_response = convert_ollama_to_gemini(raw_response) else: # openai gemini_response = convert_openai_to_gemini(raw_response) print( f"\nConverted Gemini Response:\n{json.dumps(gemini_response, indent=2)[:1000]}..." ) req_info["forwarded"] = True req_info["response"] = json.dumps(gemini_response, indent=2) req_info["converted_request"] = json.dumps(logged_request, indent=2) req_info["raw_response"] = json.dumps(raw_response, indent=2)[:2000] + ( "..." if len(json.dumps(raw_response, indent=2)) > 2000 else "" ) recent_requests.append(req_info) if len(recent_requests) > MAX_REQUESTS: recent_requests.pop(0) print(f"{'='*60}\n") return jsonify(gemini_response), 200 else: # Get detailed error try: error_data = response.json() error_msg = json.dumps(error_data, indent=2) except: error_msg = response.text full_error = f"{endpoint_type.upper()} endpoint returned {response.status_code}:\n{error_msg}" print(f"ERROR: {full_error}") req_info["error"] = full_error req_info["forwarded"] = True req_info["converted_request"] = json.dumps(logged_request, indent=2) req_info["raw_response"] = error_msg recent_requests.append(req_info) if len(recent_requests) > MAX_REQUESTS: recent_requests.pop(0) print(f"{'='*60}\n") return ( jsonify( { "error": { "message": error_msg, "status": response.status_code, } } ), response.status_code, ) else: # No JSON body, just acknowledge req_info["error"] = "No JSON body to forward" recent_requests.append(req_info) if len(recent_requests) > MAX_REQUESTS: recent_requests.pop(0) print(f"{'='*60}\n") return ( jsonify( { "status": "success", "message": "Request received but not forwarded (no JSON body)", "timestamp": timestamp, } ), 200, ) except Exception as e: error_msg = f"Error processing request: {str(e)}" print(f"ERROR: {error_msg}") import traceback traceback.print_exc() req_info["error"] = error_msg recent_requests.append(req_info) if len(recent_requests) > MAX_REQUESTS: recent_requests.pop(0) print(f"{'='*60}\n") return jsonify({"error": {"message": error_msg}}), 500 @app.route("/clear", methods=["POST"]) def clear(): """Clear all stored requests""" recent_requests.clear() return jsonify({"status": "cleared"}), 200 @app.errorhandler(404) def not_found(e): """Handle 404 errors with helpful message""" print(f"\nāŒ 404 ERROR: {request.method} {request.path}") print(f" Query string: {request.query_string.decode()}") print(f" Full path: {request.full_path}") print(f" Available routes:") for rule in app.url_map.iter_rules(): print(f" - {rule.methods} {rule.rule}") return ( jsonify( { "error": "Not Found", "message": f"The path {request.path} was not found", "hint": "POST requests should go to /webhook or /webhook/", } ), 404, ) if __name__ == "__main__": print("šŸš€ POST Request Monitor & AI Proxy starting...") print("šŸ“ Web UI: http://localhost:5005") print("šŸ“® Webhook endpoint: http://localhost:5005/webhook") print( "šŸ“® Example: http://localhost:5005/webhook/models/model:generateContent?key=none" ) print(f"šŸ”— Forwarding to: {OPENAI_ENDPOINT}") print(f"šŸ¤– Model: {OPENAI_MODEL}") print(f"šŸ“¹ Video format: {VIDEO_FORMAT}") endpoint_type = ( ENDPOINT_TYPE if ENDPOINT_TYPE != "auto" else detect_endpoint_type(OPENAI_ENDPOINT) ) print(f"šŸ”§ Endpoint type: {endpoint_type}") print("\n" + "=" * 60) print("CONFIGURATION OPTIONS:") print("Set these environment variables to configure:") print(" OPENAI_ENDPOINT - Target endpoint URL") print(" ENDPOINT_TYPE - 'openai', 'ollama', or 'auto' (default)") print(" OPENAI_MODEL - Model name") print(" VIDEO_FORMAT - 'openai', 'vllm', 'skip', or 'error'") print("\nFor Ollama:") print(" OpenAI-compatible: http://localhost:11434/v1/chat/completions") print(" Native format: http://localhost:11434/api/generate") print("=" * 60) app.run(host="0.0.0.0", port=5000, debug=True)