justadri
/
disco


			
				
					
						
						
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980
							"""
Deployment helper for Qwen models on Vast.ai
"""

import time
from typing import Dict
import logging

class ModelDeployer:
    """Helper class for deploying Qwen models"""
    
    def __init__(self):
        self.logger = logging.getLogger("ModelDeployer")
        self.logger.setLevel(logging.INFO)
        
        if not self.logger.handlers:
            handler = logging.StreamHandler()
            formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
            handler.setFormatter(formatter)
            self.logger.addHandler(handler)
    
    def generate_deployment_command(self, model_config: Dict) -> str:
        """Generate vLLM deployment command"""
        cmd_parts = [
            "python -m vllm.entrypoints.openai.api_server",
            f"--model {model_config['name']}",
            f"--tensor-parallel-size {model_config['gpus']}",
            f"--port {model_config['port']}",
            "--max-model-len 4096"
        ]
        
        if model_config.get("quantization"):
            cmd_parts.append(f"--quantization {model_config['quantization']}")
        
        return " \\
    ".join(cmd_parts)
    
    def print_deployment_instructions(self):
        """Print deployment instructions"""
        from pipeline.common_defs import ModelConfig
        
        print("\n" + "=" * 80)
        print("QWEN MODEL DEPLOYMENT INSTRUCTIONS")
        print("=" * 80)
        
        print("\n1. RENT GPUS ON VAST.AI")
        print("-" * 80)
        print("\nFor Qwen 3 235B (Primary):")
        print("  - Select: 4 × A100 80GB PCIe")
        print("  - Image: pytorch/pytorch:latest")
        print(f"  - Cost: ${ModelConfig.QWEN3_235B['cost_per_hour']}/hr")
        
        print("\nFor Qwen 2.5 72B (Secondary):")
        print("  - Select: 2 × A100 80GB PCIe")
        print("  - Image: pytorch/pytorch:latest")
        print(f"  - Cost: ${ModelConfig.QWEN25_72B['cost_per_hour']}/hr")
        
        print("\n2. INSTALL DEPENDENCIES")
        print("-" * 80)
        print("pip install vllm transformers accelerate")
        
        print("\n3. DEPLOY QWEN 3 235B (Primary)")
        print("-" * 80)
        qwen3_cmd = self.generate_deployment_command(ModelConfig.QWEN3_235B)
        print(qwen3_cmd)
        
        print("\n4. DEPLOY QWEN 2.5 72B (Secondary)")
        print("-" * 80)
        qwen25_cmd = self.generate_deployment_command(ModelConfig.QWEN25_72B)
        print(qwen25_cmd)
        
        print("\n5. VERIFY DEPLOYMENT")
        print("-" * 80)
        print("curl http://localhost:8000/health  # Qwen 3")
        print("curl http://localhost:8001/health  # Qwen 2.5")
        print("\n" + "=" * 80)

if __name__ == "__main__":
    deployer = ModelDeployer()
    deployer.print_deployment_instructions()