justadri
/
disco


			
				
					
						
						
							12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879
							"""
Deployment helper for Qwen models on Vast.ai
"""

import time
from typing import Dict
import logging

class ModelDeployer:
    """Helper class for deploying Qwen models"""

    def __init__(self):
        self.logger = logging.getLogger("ModelDeployer")
        self.logger.setLevel(logging.INFO)

        if not self.logger.handlers:
            handler = logging.StreamHandler()
            formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
            handler.setFormatter(formatter)
            self.logger.addHandler(handler)

    def generate_deployment_command(self, model_config: Dict) -> str:
        """Generate vLLM deployment command"""
        cmd_parts = [
            "python -m vllm.entrypoints.openai.api_server",
            f"--model {model_config['name']}",
            f"--tensor-parallel-size {model_config['gpus']}",
            f"--port {model_config['port']}",
            "--max-model-len 4096"
        ]

        if model_config.get("quantization"):
            cmd_parts.append(f"--quantization {model_config['quantization']}")

        return " \\\n".join(cmd_parts)

    def print_deployment_instructions(self):
        """Print deployment instructions"""
        from pipeline.common_defs import ModelConfig

        print("\n" + "=" * 80)
        print("QWEN MODEL DEPLOYMENT INSTRUCTIONS")
        print("=" * 80)

        print("\n1. RENT GPUS ON VAST.AI")
        print("-" * 80)
        print("\nFor Qwen 3 235B (Primary):")
        print("  - Select: 4 × A100 80GB PCIe")
        print("  - Image: pytorch/pytorch:latest")
        print(f"  - Cost: ${ModelConfig.QWEN3_235B['cost_per_hour']}/hr")

        print("\nFor Qwen 2.5 72B (Secondary):")
        print("  - Select: 2 × A100 80GB PCIe")
        print("  - Image: pytorch/pytorch:latest")
        print(f"  - Cost: ${ModelConfig.QWEN25_72B['cost_per_hour']}/hr")

        print("\n2. INSTALL DEPENDENCIES")
        print("-" * 80)
        print("pip install vllm transformers accelerate")

        print("\n3. DEPLOY QWEN 3 235B (Primary)")
        print("-" * 80)
        qwen3_cmd = self.generate_deployment_command(ModelConfig.QWEN3_235B)
        print(qwen3_cmd)

        print("\n4. DEPLOY QWEN 2.5 72B (Secondary)")
        print("-" * 80)
        qwen25_cmd = self.generate_deployment_command(ModelConfig.QWEN25_72B)
        print(qwen25_cmd)

        print("\n5. VERIFY DEPLOYMENT")
        print("-" * 80)
        print("curl http://localhost:8000/health  # Qwen 3")
        print("curl http://localhost:8001/health  # Qwen 2.5")
        print("\n" + "=" * 80)

if __name__ == "__main__":
    deployer = ModelDeployer()
    deployer.print_deployment_instructions()