deployment_helper.py 2.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879
  1. """
  2. Deployment helper for Qwen models on Vast.ai
  3. """
  4. import time
  5. from typing import Dict
  6. import logging
  7. class ModelDeployer:
  8. """Helper class for deploying Qwen models"""
  9. def __init__(self):
  10. self.logger = logging.getLogger("ModelDeployer")
  11. self.logger.setLevel(logging.INFO)
  12. if not self.logger.handlers:
  13. handler = logging.StreamHandler()
  14. formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
  15. handler.setFormatter(formatter)
  16. self.logger.addHandler(handler)
  17. def generate_deployment_command(self, model_config: Dict) -> str:
  18. """Generate vLLM deployment command"""
  19. cmd_parts = [
  20. "python -m vllm.entrypoints.openai.api_server",
  21. f"--model {model_config['name']}",
  22. f"--tensor-parallel-size {model_config['gpus']}",
  23. f"--port {model_config['port']}",
  24. "--max-model-len 4096"
  25. ]
  26. if model_config.get("quantization"):
  27. cmd_parts.append(f"--quantization {model_config['quantization']}")
  28. return " \\\n".join(cmd_parts)
  29. def print_deployment_instructions(self):
  30. """Print deployment instructions"""
  31. from pipeline.common_defs import ModelConfig
  32. print("\n" + "=" * 80)
  33. print("QWEN MODEL DEPLOYMENT INSTRUCTIONS")
  34. print("=" * 80)
  35. print("\n1. RENT GPUS ON VAST.AI")
  36. print("-" * 80)
  37. print("\nFor Qwen 3 235B (Primary):")
  38. print(" - Select: 4 × A100 80GB PCIe")
  39. print(" - Image: pytorch/pytorch:latest")
  40. print(f" - Cost: ${ModelConfig.QWEN3_235B['cost_per_hour']}/hr")
  41. print("\nFor Qwen 2.5 72B (Secondary):")
  42. print(" - Select: 2 × A100 80GB PCIe")
  43. print(" - Image: pytorch/pytorch:latest")
  44. print(f" - Cost: ${ModelConfig.QWEN25_72B['cost_per_hour']}/hr")
  45. print("\n2. INSTALL DEPENDENCIES")
  46. print("-" * 80)
  47. print("pip install vllm transformers accelerate")
  48. print("\n3. DEPLOY QWEN 3 235B (Primary)")
  49. print("-" * 80)
  50. qwen3_cmd = self.generate_deployment_command(ModelConfig.QWEN3_235B)
  51. print(qwen3_cmd)
  52. print("\n4. DEPLOY QWEN 2.5 72B (Secondary)")
  53. print("-" * 80)
  54. qwen25_cmd = self.generate_deployment_command(ModelConfig.QWEN25_72B)
  55. print(qwen25_cmd)
  56. print("\n5. VERIFY DEPLOYMENT")
  57. print("-" * 80)
  58. print("curl http://localhost:8000/health # Qwen 3")
  59. print("curl http://localhost:8001/health # Qwen 2.5")
  60. print("\n" + "=" * 80)
  61. if __name__ == "__main__":
  62. deployer = ModelDeployer()
  63. deployer.print_deployment_instructions()