deployment_helper.py 2.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980
  1. """
  2. Deployment helper for Qwen models on Vast.ai
  3. """
  4. import time
  5. from typing import Dict
  6. import logging
  7. class ModelDeployer:
  8. """Helper class for deploying Qwen models"""
  9. def __init__(self):
  10. self.logger = logging.getLogger("ModelDeployer")
  11. self.logger.setLevel(logging.INFO)
  12. if not self.logger.handlers:
  13. handler = logging.StreamHandler()
  14. formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
  15. handler.setFormatter(formatter)
  16. self.logger.addHandler(handler)
  17. def generate_deployment_command(self, model_config: Dict) -> str:
  18. """Generate vLLM deployment command"""
  19. cmd_parts = [
  20. "python -m vllm.entrypoints.openai.api_server",
  21. f"--model {model_config['name']}",
  22. f"--tensor-parallel-size {model_config['gpus']}",
  23. f"--port {model_config['port']}",
  24. "--max-model-len 4096"
  25. ]
  26. if model_config.get("quantization"):
  27. cmd_parts.append(f"--quantization {model_config['quantization']}")
  28. return " \\
  29. ".join(cmd_parts)
  30. def print_deployment_instructions(self):
  31. """Print deployment instructions"""
  32. from pipeline.common_defs import ModelConfig
  33. print("\n" + "=" * 80)
  34. print("QWEN MODEL DEPLOYMENT INSTRUCTIONS")
  35. print("=" * 80)
  36. print("\n1. RENT GPUS ON VAST.AI")
  37. print("-" * 80)
  38. print("\nFor Qwen 3 235B (Primary):")
  39. print(" - Select: 4 × A100 80GB PCIe")
  40. print(" - Image: pytorch/pytorch:latest")
  41. print(f" - Cost: ${ModelConfig.QWEN3_235B['cost_per_hour']}/hr")
  42. print("\nFor Qwen 2.5 72B (Secondary):")
  43. print(" - Select: 2 × A100 80GB PCIe")
  44. print(" - Image: pytorch/pytorch:latest")
  45. print(f" - Cost: ${ModelConfig.QWEN25_72B['cost_per_hour']}/hr")
  46. print("\n2. INSTALL DEPENDENCIES")
  47. print("-" * 80)
  48. print("pip install vllm transformers accelerate")
  49. print("\n3. DEPLOY QWEN 3 235B (Primary)")
  50. print("-" * 80)
  51. qwen3_cmd = self.generate_deployment_command(ModelConfig.QWEN3_235B)
  52. print(qwen3_cmd)
  53. print("\n4. DEPLOY QWEN 2.5 72B (Secondary)")
  54. print("-" * 80)
  55. qwen25_cmd = self.generate_deployment_command(ModelConfig.QWEN25_72B)
  56. print(qwen25_cmd)
  57. print("\n5. VERIFY DEPLOYMENT")
  58. print("-" * 80)
  59. print("curl http://localhost:8000/health # Qwen 3")
  60. print("curl http://localhost:8001/health # Qwen 2.5")
  61. print("\n" + "=" * 80)
  62. if __name__ == "__main__":
  63. deployer = ModelDeployer()
  64. deployer.print_deployment_instructions()