| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668 |
- from flask import Flask, request, jsonify, render_template_string
- from datetime import datetime
- import json
- import os
- import requests
- app = Flask(__name__)
- # Store recent requests in memory
- recent_requests = []
- MAX_REQUESTS = 50
- # OpenAI endpoint configuration
- OPENAI_ENDPOINT = os.getenv(
- "OPENAI_ENDPOINT", "http://localhost:11434/v1/chat/completions"
- )
- OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "none")
- OPENAI_MODEL = os.getenv("OPENAI_MODEL", "InternVL3_5-14B")
- # Video format: 'openai' (data URL), 'vllm' (try vLLM format), 'skip', or 'error'
- VIDEO_FORMAT = os.getenv("VIDEO_FORMAT", "openai")
- # NEW: Endpoint type detection
- ENDPOINT_TYPE = os.getenv("ENDPOINT_TYPE", "auto") # 'openai', 'ollama', or 'auto'
- def detect_endpoint_type(endpoint_url):
- """Auto-detect if endpoint is OpenAI-compatible or Ollama native"""
- if "/v1/chat/completions" in endpoint_url:
- return "openai"
- elif "/api/generate" in endpoint_url or "/api/chat" in endpoint_url:
- return "ollama"
- elif "localhost:11434" in endpoint_url or "ollama" in endpoint_url.lower():
- return "openai" # Assume OpenAI-compatible for Ollama
- else:
- return "openai" # Default to OpenAI format
- def convert_gemini_to_openai(gemini_request):
- """Convert Gemini API format to OpenAI API format"""
- try:
- contents = gemini_request.get("contents", [])
- messages = []
- media_info = {"images": [], "videos": []}
- for content in contents:
- parts = content.get("parts", [])
- message_content = []
- for part in parts:
- # Handle text parts
- if "text" in part:
- message_content.append({"type": "text", "text": part["text"]})
- # Handle inline_data (images/video)
- elif "inline_data" in part:
- inline = part["inline_data"]
- mime_type = inline.get("mime_type", "")
- data = inline.get("data", "")
- if mime_type.startswith("image/"):
- # Images: Universally supported across all runners
- media_info["images"].append(mime_type)
- print(f"🖼️ Adding image: {mime_type}")
- message_content.append(
- {
- "type": "image_url",
- "image_url": {
- "url": f"data:{mime_type};base64,{data}",
- "detail": "auto",
- },
- }
- )
- elif mime_type.startswith("video/"):
- # Videos: Format depends on VIDEO_FORMAT setting
- if VIDEO_FORMAT == "skip":
- media_info["videos"].append(f"skipped ({mime_type})")
- print(f"⏭️ Skipping video: {mime_type} (VIDEO_FORMAT=skip)")
- message_content.append(
- {
- "type": "text",
- "text": f"[Video content ({mime_type}) was present but skipped]",
- }
- )
- elif VIDEO_FORMAT == "error":
- raise ValueError(
- f"Video content detected ({mime_type}) but VIDEO_FORMAT=error"
- )
- else: # 'openai', 'vllm', or any other value
- media_info["videos"].append(
- f"format: {VIDEO_FORMAT} ({mime_type})"
- )
- print(
- f"📹 Adding video ({VIDEO_FORMAT} format): {mime_type}"
- )
- message_content.append(
- {
- "type": "image_url",
- "image_url": {
- "url": f"data:{mime_type};base64,{data}",
- "detail": "auto",
- },
- }
- )
- # Add as user message
- # If only one content item and it's text, send as string for better compatibility
- if len(message_content) == 1 and message_content[0].get("type") == "text":
- messages.append({"role": "user", "content": message_content[0]["text"]})
- else:
- messages.append({"role": "user", "content": message_content})
- # Build OpenAI request
- openai_request = {"model": OPENAI_MODEL, "messages": messages}
- # Add generation config as OpenAI parameters
- gen_config = gemini_request.get("generationConfig", {})
- if "maxOutputTokens" in gen_config:
- openai_request["max_tokens"] = gen_config["maxOutputTokens"]
- if "temperature" in gen_config:
- openai_request["temperature"] = gen_config["temperature"]
- # Log media summary
- if media_info["images"] or media_info["videos"]:
- print(f"📊 Media summary:")
- if media_info["images"]:
- print(
- f" Images: {len(media_info['images'])} ({', '.join(media_info['images'])})"
- )
- if media_info["videos"]:
- print(f" Videos: {', '.join(media_info['videos'])}")
- return openai_request
- except Exception as e:
- print(f"❌ Error converting request: {e}")
- raise
- def convert_gemini_to_ollama(gemini_request):
- """Convert Gemini API format to Ollama native format"""
- try:
- contents = gemini_request.get("contents", [])
- # Extract text and combine into a single prompt
- prompt_parts = []
- images = []
- for content in contents:
- parts = content.get("parts", [])
- for part in parts:
- if "text" in part:
- prompt_parts.append(part["text"])
- elif "inline_data" in part:
- inline = part["inline_data"]
- mime_type = inline.get("mime_type", "")
- data = inline.get("data", "")
- if mime_type.startswith("image/") or mime_type.startswith("video/"):
- # Ollama expects images in a different format
- images.append(data) # Just the base64 data
- print(f"🖼️ Adding media for Ollama: {mime_type}")
- # Build Ollama request
- ollama_request = {
- "model": OPENAI_MODEL,
- "prompt": " ".join(prompt_parts),
- "stream": False,
- }
- # Add images if present
- if images:
- ollama_request["images"] = images
- # Add generation config
- gen_config = gemini_request.get("generationConfig", {})
- if "temperature" in gen_config:
- ollama_request["options"] = {"temperature": gen_config["temperature"]}
- return ollama_request
- except Exception as e:
- print(f"❌ Error converting to Ollama format: {e}")
- raise
- def convert_openai_to_gemini(openai_response):
- """Convert OpenAI API response to Gemini API format"""
- try:
- # Extract the message content
- choices = openai_response.get("choices", [])
- if not choices:
- print(f"❌ No choices in OpenAI response: {openai_response}")
- return {"error": "No response generated"}
- message = choices[0].get("message", {})
- content = message.get("content", "")
- if not content:
- print(f"❌ No content in message: {message}")
- return {"error": "No response generated"}
- # Convert to Gemini format
- gemini_response = {
- "candidates": [
- {
- "content": {"parts": [{"text": content}], "role": "model"},
- "finishReason": "STOP",
- "index": 0,
- }
- ],
- "usageMetadata": {
- "promptTokenCount": openai_response.get("usage", {}).get(
- "prompt_tokens", 0
- ),
- "candidatesTokenCount": openai_response.get("usage", {}).get(
- "completion_tokens", 0
- ),
- "totalTokenCount": openai_response.get("usage", {}).get(
- "total_tokens", 0
- ),
- },
- }
- return gemini_response
- except Exception as e:
- print(f"❌ Error converting OpenAI response: {e}")
- raise
- def convert_ollama_to_gemini(ollama_response):
- """Convert Ollama native response to Gemini API format"""
- try:
- # Ollama /api/generate returns: {"response": "text", "done": true, ...}
- response_text = ollama_response.get("response", "")
- if not response_text:
- print(f"❌ No response text in Ollama response: {ollama_response}")
- return {"error": "No response generated"}
- # Convert to Gemini format
- gemini_response = {
- "candidates": [
- {
- "content": {"parts": [{"text": response_text}], "role": "model"},
- "finishReason": "STOP",
- "index": 0,
- }
- ],
- "usageMetadata": {
- "promptTokenCount": ollama_response.get("prompt_eval_count", 0),
- "candidatesTokenCount": ollama_response.get("eval_count", 0),
- "totalTokenCount": ollama_response.get("prompt_eval_count", 0)
- + ollama_response.get("eval_count", 0),
- },
- }
- return gemini_response
- except Exception as e:
- print(f"❌ Error converting Ollama response: {e}")
- raise
- HTML_TEMPLATE = """
- <!DOCTYPE html>
- <html>
- <head>
- <title>POST Request Monitor</title>
- <style>
- body { font-family: Arial, sans-serif; margin: 20px; background: #f5f5f5; }
- h1 { color: #333; }
- .config { background: #e3f2fd; padding: 10px; margin: 10px 0; border-radius: 5px; }
- .request {
- background: white;
- padding: 15px;
- margin: 10px 0;
- border-radius: 5px;
- box-shadow: 0 2px 4px rgba(0,0,0,0.1);
- }
- .timestamp { color: #666; font-size: 0.9em; }
- .method {
- display: inline-block;
- padding: 3px 8px;
- background: #4CAF50;
- color: white;
- border-radius: 3px;
- font-weight: bold;
- }
- .forwarded {
- display: inline-block;
- padding: 3px 8px;
- background: #2196F3;
- color: white;
- border-radius: 3px;
- font-size: 0.8em;
- margin-left: 10px;
- }
- .error-badge {
- display: inline-block;
- padding: 3px 8px;
- background: #f44336;
- color: white;
- border-radius: 3px;
- font-size: 0.8em;
- margin-left: 10px;
- }
- pre {
- background: #f4f4f4;
- padding: 10px;
- border-radius: 3px;
- overflow-x: auto;
- max-height: 300px;
- overflow-y: auto;
- }
- .clear-btn {
- background: #f44336;
- color: white;
- border: none;
- padding: 10px 20px;
- border-radius: 5px;
- cursor: pointer;
- margin: 10px 0;
- }
- .clear-btn:hover { background: #d32f2f; }
- </style>
- <script>
- function clearRequests() {
- fetch('/clear', { method: 'POST' })
- .then(() => location.reload());
- }
- // Auto-refresh every 3 seconds
- setTimeout(() => location.reload(), 3000);
- </script>
- </head>
- <body>
- <h1>📬 POST Request Monitor & AI Proxy</h1>
- <div class="config">
- <strong>Configuration:</strong><br>
- Endpoint: <strong>{{ endpoint }}</strong><br>
- Type: <strong>{{ endpoint_type }}</strong><br>
- Model: <strong>{{ model }}</strong><br>
- Video Format: <strong>{{ video_format }}</strong>
- </div>
- <p>Send POST requests to <strong>http://localhost:5005/webhook</strong></p>
- <button class="clear-btn" onclick="clearRequests()">Clear All</button>
- <div id="requests">
- {% for req in requests %}
- <div class="request">
- <div>
- <span class="method">{{ req.method }}</span>
- <span class="timestamp">{{ req.timestamp }}</span>
- {% if req.forwarded %}
- <span class="forwarded">FORWARDED ({{ req.endpoint_type }})</span>
- {% endif %}
- {% if req.error %}
- <span class="error-badge">ERROR</span>
- {% endif %}
- </div>
- <div><strong>Path:</strong> {{ req.path }}</div>
- {% if req.query_params %}
- <div><strong>Query Parameters:</strong></div>
- <pre>{{ req.query_params }}</pre>
- {% endif %}
- {% if req.body %}
- <div><strong>Incoming Body (Gemini Format):</strong></div>
- <pre>{{ req.body }}</pre>
- {% endif %}
- {% if req.converted_request %}
- <div><strong>Converted Request:</strong></div>
- <pre>{{ req.converted_request }}</pre>
- {% endif %}
- {% if req.raw_response %}
- <div><strong>Raw Response:</strong></div>
- <pre>{{ req.raw_response }}</pre>
- {% endif %}
- {% if req.response %}
- <div><strong>Final Response (Gemini Format):</strong></div>
- <pre>{{ req.response }}</pre>
- {% endif %}
- {% if req.error %}
- <div><strong>Error:</strong></div>
- <pre style="color: red;">{{ req.error }}</pre>
- {% endif %}
- </div>
- {% endfor %}
- </div>
- </body>
- </html>
- """
- @app.route("/")
- def index():
- endpoint_type = (
- ENDPOINT_TYPE
- if ENDPOINT_TYPE != "auto"
- else detect_endpoint_type(OPENAI_ENDPOINT)
- )
- return render_template_string(
- HTML_TEMPLATE,
- requests=reversed(recent_requests),
- endpoint=OPENAI_ENDPOINT,
- endpoint_type=endpoint_type,
- model=OPENAI_MODEL,
- video_format=VIDEO_FORMAT,
- )
- @app.route("/webhook", methods=["POST", "PUT", "PATCH"], defaults={"subpath": ""})
- @app.route("/webhook/<path:subpath>", methods=["POST", "PUT", "PATCH"])
- def webhook(subpath):
- """Accept POST/PUT/PATCH requests, forward to AI endpoint, and return response"""
- timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
- # Get full path with query parameters
- full_path = request.full_path if request.query_string else request.path
- print(f"\n{'='*60}")
- print(f"[{timestamp}] INCOMING {request.method} {full_path}")
- print(f"Matched route with subpath: '{subpath}'")
- print(f"{'='*60}")
- # Detect endpoint type
- endpoint_type = (
- ENDPOINT_TYPE
- if ENDPOINT_TYPE != "auto"
- else detect_endpoint_type(OPENAI_ENDPOINT)
- )
- print(f"Detected endpoint type: {endpoint_type}")
- # Get request data
- try:
- body = request.get_data(as_text=True)
- gemini_request = request.get_json() if request.is_json else {}
- body_display = json.dumps(gemini_request, indent=2) if gemini_request else body
- except Exception as e:
- body_display = str(request.get_data())
- gemini_request = {}
- # Print request details
- if request.args:
- print("Query Parameters:")
- for key, value in request.args.items():
- print(f" {key}: {value}")
- print(
- f"Body preview:\n{body_display[:500]}{'...' if len(body_display) > 500 else ''}"
- )
- # Store request info for monitoring
- req_info = {
- "timestamp": timestamp,
- "method": request.method,
- "path": full_path,
- "query_params": dict(request.args),
- "body": body_display,
- "forwarded": False,
- "response": None,
- "error": None,
- "endpoint_type": endpoint_type,
- "converted_request": None,
- "raw_response": None,
- }
- # Try to forward to AI endpoint
- try:
- if gemini_request:
- print(f"\n{'='*60}")
- print(f"CONVERTING AND FORWARDING TO {endpoint_type.upper()} ENDPOINT")
- print(f"Target: {OPENAI_ENDPOINT}")
- print(f"{'='*60}")
- # Convert based on endpoint type
- if endpoint_type == "ollama":
- converted_request = convert_gemini_to_ollama(gemini_request)
- else: # openai
- converted_request = convert_gemini_to_openai(gemini_request)
- # Log the converted request (truncate base64 for readability)
- logged_request = json.loads(json.dumps(converted_request))
- if endpoint_type == "openai":
- for msg in logged_request.get("messages", []):
- if isinstance(msg.get("content"), list):
- for item in msg["content"]:
- if item.get("type") == "image_url":
- url = item["image_url"]["url"]
- if len(url) > 100:
- item["image_url"]["url"] = (
- url[:50] + "...[base64 data]..." + url[-20:]
- )
- elif "images" in logged_request:
- # Truncate Ollama images
- for i, img in enumerate(logged_request["images"]):
- if len(img) > 100:
- logged_request["images"][i] = (
- img[:50] + "...[base64 data]..." + img[-20:]
- )
- print(f"Converted request:\n{json.dumps(logged_request, indent=2)}")
- # Forward to endpoint
- headers = {
- "Content-Type": "application/json",
- }
- if (
- OPENAI_API_KEY
- and OPENAI_API_KEY != "none"
- and endpoint_type == "openai"
- ):
- headers["Authorization"] = f"Bearer {OPENAI_API_KEY}"
- print(f"Sending request to {OPENAI_ENDPOINT}...")
- response = requests.post(
- OPENAI_ENDPOINT, json=converted_request, headers=headers, timeout=120
- )
- print(f"\nResponse Status: {response.status_code}")
- print(f"Response Headers: {dict(response.headers)}")
- if response.status_code == 200:
- raw_response = response.json()
- print(f"Raw Response:\n{json.dumps(raw_response, indent=2)[:1000]}...")
- # Convert back to Gemini format based on endpoint type
- if endpoint_type == "ollama":
- gemini_response = convert_ollama_to_gemini(raw_response)
- else: # openai
- gemini_response = convert_openai_to_gemini(raw_response)
- print(
- f"\nConverted Gemini Response:\n{json.dumps(gemini_response, indent=2)[:1000]}..."
- )
- req_info["forwarded"] = True
- req_info["response"] = json.dumps(gemini_response, indent=2)
- req_info["converted_request"] = json.dumps(logged_request, indent=2)
- req_info["raw_response"] = json.dumps(raw_response, indent=2)[:2000] + (
- "..." if len(json.dumps(raw_response, indent=2)) > 2000 else ""
- )
- recent_requests.append(req_info)
- if len(recent_requests) > MAX_REQUESTS:
- recent_requests.pop(0)
- print(f"{'='*60}\n")
- return jsonify(gemini_response), 200
- else:
- # Get detailed error
- try:
- error_data = response.json()
- error_msg = json.dumps(error_data, indent=2)
- except:
- error_msg = response.text
- full_error = f"{endpoint_type.upper()} endpoint returned {response.status_code}:\n{error_msg}"
- print(f"ERROR: {full_error}")
- req_info["error"] = full_error
- req_info["forwarded"] = True
- req_info["converted_request"] = json.dumps(logged_request, indent=2)
- req_info["raw_response"] = error_msg
- recent_requests.append(req_info)
- if len(recent_requests) > MAX_REQUESTS:
- recent_requests.pop(0)
- print(f"{'='*60}\n")
- return (
- jsonify(
- {
- "error": {
- "message": error_msg,
- "status": response.status_code,
- }
- }
- ),
- response.status_code,
- )
- else:
- # No JSON body, just acknowledge
- req_info["error"] = "No JSON body to forward"
- recent_requests.append(req_info)
- if len(recent_requests) > MAX_REQUESTS:
- recent_requests.pop(0)
- print(f"{'='*60}\n")
- return (
- jsonify(
- {
- "status": "success",
- "message": "Request received but not forwarded (no JSON body)",
- "timestamp": timestamp,
- }
- ),
- 200,
- )
- except Exception as e:
- error_msg = f"Error processing request: {str(e)}"
- print(f"ERROR: {error_msg}")
- import traceback
- traceback.print_exc()
- req_info["error"] = error_msg
- recent_requests.append(req_info)
- if len(recent_requests) > MAX_REQUESTS:
- recent_requests.pop(0)
- print(f"{'='*60}\n")
- return jsonify({"error": {"message": error_msg}}), 500
- @app.route("/clear", methods=["POST"])
- def clear():
- """Clear all stored requests"""
- recent_requests.clear()
- return jsonify({"status": "cleared"}), 200
- @app.errorhandler(404)
- def not_found(e):
- """Handle 404 errors with helpful message"""
- print(f"\n❌ 404 ERROR: {request.method} {request.path}")
- print(f" Query string: {request.query_string.decode()}")
- print(f" Full path: {request.full_path}")
- print(f" Available routes:")
- for rule in app.url_map.iter_rules():
- print(f" - {rule.methods} {rule.rule}")
- return (
- jsonify(
- {
- "error": "Not Found",
- "message": f"The path {request.path} was not found",
- "hint": "POST requests should go to /webhook or /webhook/<path>",
- }
- ),
- 404,
- )
- if __name__ == "__main__":
- print("🚀 POST Request Monitor & AI Proxy starting...")
- print("📍 Web UI: http://localhost:5005")
- print("📮 Webhook endpoint: http://localhost:5005/webhook")
- print(
- "📮 Example: http://localhost:5005/webhook/models/model:generateContent?key=none"
- )
- print(f"🔗 Forwarding to: {OPENAI_ENDPOINT}")
- print(f"🤖 Model: {OPENAI_MODEL}")
- print(f"📹 Video format: {VIDEO_FORMAT}")
- endpoint_type = (
- ENDPOINT_TYPE
- if ENDPOINT_TYPE != "auto"
- else detect_endpoint_type(OPENAI_ENDPOINT)
- )
- print(f"🔧 Endpoint type: {endpoint_type}")
- print("\n" + "=" * 60)
- print("CONFIGURATION OPTIONS:")
- print("Set these environment variables to configure:")
- print(" OPENAI_ENDPOINT - Target endpoint URL")
- print(" ENDPOINT_TYPE - 'openai', 'ollama', or 'auto' (default)")
- print(" OPENAI_MODEL - Model name")
- print(" VIDEO_FORMAT - 'openai', 'vllm', 'skip', or 'error'")
- print("\nFor Ollama:")
- print(" OpenAI-compatible: http://localhost:11434/v1/chat/completions")
- print(" Native format: http://localhost:11434/api/generate")
- print("=" * 60)
- app.run(host="0.0.0.0", port=5000, debug=True)
|