| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980 |
- """
- Deployment helper for Qwen models on Vast.ai
- """
- import time
- from typing import Dict
- import logging
- class ModelDeployer:
- """Helper class for deploying Qwen models"""
-
- def __init__(self):
- self.logger = logging.getLogger("ModelDeployer")
- self.logger.setLevel(logging.INFO)
-
- if not self.logger.handlers:
- handler = logging.StreamHandler()
- formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
- handler.setFormatter(formatter)
- self.logger.addHandler(handler)
-
- def generate_deployment_command(self, model_config: Dict) -> str:
- """Generate vLLM deployment command"""
- cmd_parts = [
- "python -m vllm.entrypoints.openai.api_server",
- f"--model {model_config['name']}",
- f"--tensor-parallel-size {model_config['gpus']}",
- f"--port {model_config['port']}",
- "--max-model-len 4096"
- ]
-
- if model_config.get("quantization"):
- cmd_parts.append(f"--quantization {model_config['quantization']}")
-
- return " \\
- ".join(cmd_parts)
-
- def print_deployment_instructions(self):
- """Print deployment instructions"""
- from pipeline.common_defs import ModelConfig
-
- print("\n" + "=" * 80)
- print("QWEN MODEL DEPLOYMENT INSTRUCTIONS")
- print("=" * 80)
-
- print("\n1. RENT GPUS ON VAST.AI")
- print("-" * 80)
- print("\nFor Qwen 3 235B (Primary):")
- print(" - Select: 4 × A100 80GB PCIe")
- print(" - Image: pytorch/pytorch:latest")
- print(f" - Cost: ${ModelConfig.QWEN3_235B['cost_per_hour']}/hr")
-
- print("\nFor Qwen 2.5 72B (Secondary):")
- print(" - Select: 2 × A100 80GB PCIe")
- print(" - Image: pytorch/pytorch:latest")
- print(f" - Cost: ${ModelConfig.QWEN25_72B['cost_per_hour']}/hr")
-
- print("\n2. INSTALL DEPENDENCIES")
- print("-" * 80)
- print("pip install vllm transformers accelerate")
-
- print("\n3. DEPLOY QWEN 3 235B (Primary)")
- print("-" * 80)
- qwen3_cmd = self.generate_deployment_command(ModelConfig.QWEN3_235B)
- print(qwen3_cmd)
-
- print("\n4. DEPLOY QWEN 2.5 72B (Secondary)")
- print("-" * 80)
- qwen25_cmd = self.generate_deployment_command(ModelConfig.QWEN25_72B)
- print(qwen25_cmd)
-
- print("\n5. VERIFY DEPLOYMENT")
- print("-" * 80)
- print("curl http://localhost:8000/health # Qwen 3")
- print("curl http://localhost:8001/health # Qwen 2.5")
- print("\n" + "=" * 80)
- if __name__ == "__main__":
- deployer = ModelDeployer()
- deployer.print_deployment_instructions()
|