from flask import Flask, request, jsonify, render_template_string
from datetime import datetime
import json
import os
import requests
app = Flask(__name__)
# Store recent requests in memory
recent_requests = []
MAX_REQUESTS = 50
# OpenAI endpoint configuration
OPENAI_ENDPOINT = os.getenv(
"OPENAI_ENDPOINT", "http://localhost:11434/v1/chat/completions"
)
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "none")
OPENAI_MODEL = os.getenv("OPENAI_MODEL", "InternVL3_5-14B")
# Video format: 'openai' (data URL), 'vllm' (try vLLM format), 'skip', or 'error'
VIDEO_FORMAT = os.getenv("VIDEO_FORMAT", "openai")
# NEW: Endpoint type detection
ENDPOINT_TYPE = os.getenv("ENDPOINT_TYPE", "auto") # 'openai', 'ollama', or 'auto'
def detect_endpoint_type(endpoint_url):
"""Auto-detect if endpoint is OpenAI-compatible or Ollama native"""
if "/v1/chat/completions" in endpoint_url:
return "openai"
elif "/api/generate" in endpoint_url or "/api/chat" in endpoint_url:
return "ollama"
elif "localhost:11434" in endpoint_url or "ollama" in endpoint_url.lower():
return "openai" # Assume OpenAI-compatible for Ollama
else:
return "openai" # Default to OpenAI format
def convert_gemini_to_openai(gemini_request):
"""Convert Gemini API format to OpenAI API format"""
try:
contents = gemini_request.get("contents", [])
messages = []
media_info = {"images": [], "videos": []}
for content in contents:
parts = content.get("parts", [])
message_content = []
for part in parts:
# Handle text parts
if "text" in part:
message_content.append({"type": "text", "text": part["text"]})
# Handle inline_data (images/video)
elif "inline_data" in part:
inline = part["inline_data"]
mime_type = inline.get("mime_type", "")
data = inline.get("data", "")
if mime_type.startswith("image/"):
# Images: Universally supported across all runners
media_info["images"].append(mime_type)
print(f"š¼ļø Adding image: {mime_type}")
message_content.append(
{
"type": "image_url",
"image_url": {
"url": f"data:{mime_type};base64,{data}",
"detail": "auto",
},
}
)
elif mime_type.startswith("video/"):
# Videos: Format depends on VIDEO_FORMAT setting
if VIDEO_FORMAT == "skip":
media_info["videos"].append(f"skipped ({mime_type})")
print(f"āļø Skipping video: {mime_type} (VIDEO_FORMAT=skip)")
message_content.append(
{
"type": "text",
"text": f"[Video content ({mime_type}) was present but skipped]",
}
)
elif VIDEO_FORMAT == "error":
raise ValueError(
f"Video content detected ({mime_type}) but VIDEO_FORMAT=error"
)
else: # 'openai', 'vllm', or any other value
media_info["videos"].append(
f"format: {VIDEO_FORMAT} ({mime_type})"
)
print(
f"š¹ Adding video ({VIDEO_FORMAT} format): {mime_type}"
)
message_content.append(
{
"type": "image_url",
"image_url": {
"url": f"data:{mime_type};base64,{data}",
"detail": "auto",
},
}
)
# Add as user message
# If only one content item and it's text, send as string for better compatibility
if len(message_content) == 1 and message_content[0].get("type") == "text":
messages.append({"role": "user", "content": message_content[0]["text"]})
else:
messages.append({"role": "user", "content": message_content})
# Build OpenAI request
openai_request = {"model": OPENAI_MODEL, "messages": messages}
# Add generation config as OpenAI parameters
gen_config = gemini_request.get("generationConfig", {})
if "maxOutputTokens" in gen_config:
openai_request["max_tokens"] = gen_config["maxOutputTokens"]
if "temperature" in gen_config:
openai_request["temperature"] = gen_config["temperature"]
# Log media summary
if media_info["images"] or media_info["videos"]:
print(f"š Media summary:")
if media_info["images"]:
print(
f" Images: {len(media_info['images'])} ({', '.join(media_info['images'])})"
)
if media_info["videos"]:
print(f" Videos: {', '.join(media_info['videos'])}")
return openai_request
except Exception as e:
print(f"ā Error converting request: {e}")
raise
def convert_gemini_to_ollama(gemini_request):
"""Convert Gemini API format to Ollama native format"""
try:
contents = gemini_request.get("contents", [])
# Extract text and combine into a single prompt
prompt_parts = []
images = []
for content in contents:
parts = content.get("parts", [])
for part in parts:
if "text" in part:
prompt_parts.append(part["text"])
elif "inline_data" in part:
inline = part["inline_data"]
mime_type = inline.get("mime_type", "")
data = inline.get("data", "")
if mime_type.startswith("image/") or mime_type.startswith("video/"):
# Ollama expects images in a different format
images.append(data) # Just the base64 data
print(f"š¼ļø Adding media for Ollama: {mime_type}")
# Build Ollama request
ollama_request = {
"model": OPENAI_MODEL,
"prompt": " ".join(prompt_parts),
"stream": False,
}
# Add images if present
if images:
ollama_request["images"] = images
# Add generation config
gen_config = gemini_request.get("generationConfig", {})
if "temperature" in gen_config:
ollama_request["options"] = {"temperature": gen_config["temperature"]}
return ollama_request
except Exception as e:
print(f"ā Error converting to Ollama format: {e}")
raise
def convert_openai_to_gemini(openai_response):
"""Convert OpenAI API response to Gemini API format"""
try:
# Extract the message content
choices = openai_response.get("choices", [])
if not choices:
print(f"ā No choices in OpenAI response: {openai_response}")
return {"error": "No response generated"}
message = choices[0].get("message", {})
content = message.get("content", "")
if not content:
print(f"ā No content in message: {message}")
return {"error": "No response generated"}
# Convert to Gemini format
gemini_response = {
"candidates": [
{
"content": {"parts": [{"text": content}], "role": "model"},
"finishReason": "STOP",
"index": 0,
}
],
"usageMetadata": {
"promptTokenCount": openai_response.get("usage", {}).get(
"prompt_tokens", 0
),
"candidatesTokenCount": openai_response.get("usage", {}).get(
"completion_tokens", 0
),
"totalTokenCount": openai_response.get("usage", {}).get(
"total_tokens", 0
),
},
}
return gemini_response
except Exception as e:
print(f"ā Error converting OpenAI response: {e}")
raise
def convert_ollama_to_gemini(ollama_response):
"""Convert Ollama native response to Gemini API format"""
try:
# Ollama /api/generate returns: {"response": "text", "done": true, ...}
response_text = ollama_response.get("response", "")
if not response_text:
print(f"ā No response text in Ollama response: {ollama_response}")
return {"error": "No response generated"}
# Convert to Gemini format
gemini_response = {
"candidates": [
{
"content": {"parts": [{"text": response_text}], "role": "model"},
"finishReason": "STOP",
"index": 0,
}
],
"usageMetadata": {
"promptTokenCount": ollama_response.get("prompt_eval_count", 0),
"candidatesTokenCount": ollama_response.get("eval_count", 0),
"totalTokenCount": ollama_response.get("prompt_eval_count", 0)
+ ollama_response.get("eval_count", 0),
},
}
return gemini_response
except Exception as e:
print(f"ā Error converting Ollama response: {e}")
raise
HTML_TEMPLATE = """
POST Request Monitor
š¬ POST Request Monitor & AI Proxy
Configuration:
Endpoint: {{ endpoint }}
Type: {{ endpoint_type }}
Model: {{ model }}
Video Format: {{ video_format }}
Send POST requests to http://localhost:5005/webhook
{% for req in requests %}
{{ req.method }}
{{ req.timestamp }}
{% if req.forwarded %}
FORWARDED ({{ req.endpoint_type }})
{% endif %}
{% if req.error %}
ERROR
{% endif %}
Path: {{ req.path }}
{% if req.query_params %}
Query Parameters:
{{ req.query_params }}
{% endif %}
{% if req.body %}
Incoming Body (Gemini Format):
{{ req.body }}
{% endif %}
{% if req.converted_request %}
Converted Request:
{{ req.converted_request }}
{% endif %}
{% if req.raw_response %}
Raw Response:
{{ req.raw_response }}
{% endif %}
{% if req.response %}
Final Response (Gemini Format):
{{ req.response }}
{% endif %}
{% if req.error %}
Error:
{{ req.error }}
{% endif %}
{% endfor %}
"""
@app.route("/")
def index():
endpoint_type = (
ENDPOINT_TYPE
if ENDPOINT_TYPE != "auto"
else detect_endpoint_type(OPENAI_ENDPOINT)
)
return render_template_string(
HTML_TEMPLATE,
requests=reversed(recent_requests),
endpoint=OPENAI_ENDPOINT,
endpoint_type=endpoint_type,
model=OPENAI_MODEL,
video_format=VIDEO_FORMAT,
)
@app.route("/webhook", methods=["POST", "PUT", "PATCH"], defaults={"subpath": ""})
@app.route("/webhook/", methods=["POST", "PUT", "PATCH"])
def webhook(subpath):
"""Accept POST/PUT/PATCH requests, forward to AI endpoint, and return response"""
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
# Get full path with query parameters
full_path = request.full_path if request.query_string else request.path
print(f"\n{'='*60}")
print(f"[{timestamp}] INCOMING {request.method} {full_path}")
print(f"Matched route with subpath: '{subpath}'")
print(f"{'='*60}")
# Detect endpoint type
endpoint_type = (
ENDPOINT_TYPE
if ENDPOINT_TYPE != "auto"
else detect_endpoint_type(OPENAI_ENDPOINT)
)
print(f"Detected endpoint type: {endpoint_type}")
# Get request data
try:
body = request.get_data(as_text=True)
gemini_request = request.get_json() if request.is_json else {}
body_display = json.dumps(gemini_request, indent=2) if gemini_request else body
except Exception as e:
body_display = str(request.get_data())
gemini_request = {}
# Print request details
if request.args:
print("Query Parameters:")
for key, value in request.args.items():
print(f" {key}: {value}")
print(
f"Body preview:\n{body_display[:500]}{'...' if len(body_display) > 500 else ''}"
)
# Store request info for monitoring
req_info = {
"timestamp": timestamp,
"method": request.method,
"path": full_path,
"query_params": dict(request.args),
"body": body_display,
"forwarded": False,
"response": None,
"error": None,
"endpoint_type": endpoint_type,
"converted_request": None,
"raw_response": None,
}
# Try to forward to AI endpoint
try:
if gemini_request:
print(f"\n{'='*60}")
print(f"CONVERTING AND FORWARDING TO {endpoint_type.upper()} ENDPOINT")
print(f"Target: {OPENAI_ENDPOINT}")
print(f"{'='*60}")
# Convert based on endpoint type
if endpoint_type == "ollama":
converted_request = convert_gemini_to_ollama(gemini_request)
else: # openai
converted_request = convert_gemini_to_openai(gemini_request)
# Log the converted request (truncate base64 for readability)
logged_request = json.loads(json.dumps(converted_request))
if endpoint_type == "openai":
for msg in logged_request.get("messages", []):
if isinstance(msg.get("content"), list):
for item in msg["content"]:
if item.get("type") == "image_url":
url = item["image_url"]["url"]
if len(url) > 100:
item["image_url"]["url"] = (
url[:50] + "...[base64 data]..." + url[-20:]
)
elif "images" in logged_request:
# Truncate Ollama images
for i, img in enumerate(logged_request["images"]):
if len(img) > 100:
logged_request["images"][i] = (
img[:50] + "...[base64 data]..." + img[-20:]
)
print(f"Converted request:\n{json.dumps(logged_request, indent=2)}")
# Forward to endpoint
headers = {
"Content-Type": "application/json",
}
if (
OPENAI_API_KEY
and OPENAI_API_KEY != "none"
and endpoint_type == "openai"
):
headers["Authorization"] = f"Bearer {OPENAI_API_KEY}"
print(f"Sending request to {OPENAI_ENDPOINT}...")
response = requests.post(
OPENAI_ENDPOINT, json=converted_request, headers=headers, timeout=120
)
print(f"\nResponse Status: {response.status_code}")
print(f"Response Headers: {dict(response.headers)}")
if response.status_code == 200:
raw_response = response.json()
print(f"Raw Response:\n{json.dumps(raw_response, indent=2)[:1000]}...")
# Convert back to Gemini format based on endpoint type
if endpoint_type == "ollama":
gemini_response = convert_ollama_to_gemini(raw_response)
else: # openai
gemini_response = convert_openai_to_gemini(raw_response)
print(
f"\nConverted Gemini Response:\n{json.dumps(gemini_response, indent=2)[:1000]}..."
)
req_info["forwarded"] = True
req_info["response"] = json.dumps(gemini_response, indent=2)
req_info["converted_request"] = json.dumps(logged_request, indent=2)
req_info["raw_response"] = json.dumps(raw_response, indent=2)[:2000] + (
"..." if len(json.dumps(raw_response, indent=2)) > 2000 else ""
)
recent_requests.append(req_info)
if len(recent_requests) > MAX_REQUESTS:
recent_requests.pop(0)
print(f"{'='*60}\n")
return jsonify(gemini_response), 200
else:
# Get detailed error
try:
error_data = response.json()
error_msg = json.dumps(error_data, indent=2)
except:
error_msg = response.text
full_error = f"{endpoint_type.upper()} endpoint returned {response.status_code}:\n{error_msg}"
print(f"ERROR: {full_error}")
req_info["error"] = full_error
req_info["forwarded"] = True
req_info["converted_request"] = json.dumps(logged_request, indent=2)
req_info["raw_response"] = error_msg
recent_requests.append(req_info)
if len(recent_requests) > MAX_REQUESTS:
recent_requests.pop(0)
print(f"{'='*60}\n")
return (
jsonify(
{
"error": {
"message": error_msg,
"status": response.status_code,
}
}
),
response.status_code,
)
else:
# No JSON body, just acknowledge
req_info["error"] = "No JSON body to forward"
recent_requests.append(req_info)
if len(recent_requests) > MAX_REQUESTS:
recent_requests.pop(0)
print(f"{'='*60}\n")
return (
jsonify(
{
"status": "success",
"message": "Request received but not forwarded (no JSON body)",
"timestamp": timestamp,
}
),
200,
)
except Exception as e:
error_msg = f"Error processing request: {str(e)}"
print(f"ERROR: {error_msg}")
import traceback
traceback.print_exc()
req_info["error"] = error_msg
recent_requests.append(req_info)
if len(recent_requests) > MAX_REQUESTS:
recent_requests.pop(0)
print(f"{'='*60}\n")
return jsonify({"error": {"message": error_msg}}), 500
@app.route("/clear", methods=["POST"])
def clear():
"""Clear all stored requests"""
recent_requests.clear()
return jsonify({"status": "cleared"}), 200
@app.errorhandler(404)
def not_found(e):
"""Handle 404 errors with helpful message"""
print(f"\nā 404 ERROR: {request.method} {request.path}")
print(f" Query string: {request.query_string.decode()}")
print(f" Full path: {request.full_path}")
print(f" Available routes:")
for rule in app.url_map.iter_rules():
print(f" - {rule.methods} {rule.rule}")
return (
jsonify(
{
"error": "Not Found",
"message": f"The path {request.path} was not found",
"hint": "POST requests should go to /webhook or /webhook/",
}
),
404,
)
if __name__ == "__main__":
print("š POST Request Monitor & AI Proxy starting...")
print("š Web UI: http://localhost:5005")
print("š® Webhook endpoint: http://localhost:5005/webhook")
print(
"š® Example: http://localhost:5005/webhook/models/model:generateContent?key=none"
)
print(f"š Forwarding to: {OPENAI_ENDPOINT}")
print(f"š¤ Model: {OPENAI_MODEL}")
print(f"š¹ Video format: {VIDEO_FORMAT}")
endpoint_type = (
ENDPOINT_TYPE
if ENDPOINT_TYPE != "auto"
else detect_endpoint_type(OPENAI_ENDPOINT)
)
print(f"š§ Endpoint type: {endpoint_type}")
print("\n" + "=" * 60)
print("CONFIGURATION OPTIONS:")
print("Set these environment variables to configure:")
print(" OPENAI_ENDPOINT - Target endpoint URL")
print(" ENDPOINT_TYPE - 'openai', 'ollama', or 'auto' (default)")
print(" OPENAI_MODEL - Model name")
print(" VIDEO_FORMAT - 'openai', 'vllm', 'skip', or 'error'")
print("\nFor Ollama:")
print(" OpenAI-compatible: http://localhost:11434/v1/chat/completions")
print(" Native format: http://localhost:11434/api/generate")
print("=" * 60)
app.run(host="0.0.0.0", port=5000, debug=True)