step6_labeling_template.py 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126
  1. """
  2. Step 6: Generate attorney labeling template.
  3. """
  4. from typing import List
  5. from pipeline.models.base import PipelineStep
  6. from pipeline.common_defs import Chunk, CASE_NAME, SUBPOENA_CRITERIA
  7. class LabelingTemplateGenerator(PipelineStep):
  8. """Generate attorney labeling template"""
  9. def __init__(self, output_dir: str = './pipeline_output'):
  10. super().__init__(output_dir)
  11. def execute(self, samples: List[Chunk]) -> str:
  12. """
  13. Generate attorney labeling template.
  14. Args:
  15. samples: List of sampled chunks
  16. Returns:
  17. Path to generated template file
  18. """
  19. self.logger.info(f"Generating labeling template for {len(samples)} samples...")
  20. template = self._create_template(samples)
  21. filepath = self.output_dir / 'attorney_labeling_template.txt'
  22. with open(filepath, 'w') as f:
  23. f.write(template)
  24. self.logger.info(f"Template saved to: {filepath}")
  25. return str(filepath)
  26. def _create_template(self, samples: List[Chunk]) -> str:
  27. """Create the template content"""
  28. lines = []
  29. # Header
  30. lines.append("ATTORNEY LABELING TEMPLATE")
  31. lines.append(CASE_NAME)
  32. lines.append("=" * 80)
  33. lines.append("")
  34. # Instructions
  35. lines.append("INSTRUCTIONS:")
  36. lines.append("For each message below, please provide:")
  37. lines.append("1. RESPONSIVE: YES or NO")
  38. lines.append("2. REASONING: Brief explanation of your decision")
  39. lines.append("3. CRITERIA: Which subpoena criteria matched (1-7):")
  40. lines.append("")
  41. for num, desc in SUBPOENA_CRITERIA.items():
  42. lines.append(f" {num}. {desc}")
  43. lines.append("")
  44. lines.append("=" * 80)
  45. lines.append("")
  46. # Samples
  47. for i, sample in enumerate(samples, 1):
  48. lines.extend(self._format_sample(i, sample))
  49. return "\n".join(lines)
  50. def _format_sample(self, sample_num: int, chunk: Chunk) -> List[str]:
  51. """Format a single sample"""
  52. lines = []
  53. lines.append(f"SAMPLE {sample_num}")
  54. lines.append("-" * 80)
  55. # First message (target for labeling)
  56. if chunk.messages:
  57. first_msg = chunk.messages[0]
  58. lines.append(f"Line: {first_msg.line_number}")
  59. lines.append(f"Time: {first_msg.timestamp}")
  60. lines.append(f"Sender: {first_msg.sender}")
  61. lines.append(f"Message: {first_msg.message}")
  62. lines.append("")
  63. # Context (surrounding messages)
  64. lines.append("Context (surrounding messages):")
  65. for j, msg in enumerate(chunk.messages[:5], 1):
  66. marker = ">>>" if j == 1 else " "
  67. msg_preview = msg.message[:80] + "..." if len(msg.message) > 80 else msg.message
  68. lines.append(f"{marker} [{msg.sender}]: {msg_preview}")
  69. lines.append("")
  70. # Response fields
  71. lines.append("RESPONSIVE: _______")
  72. lines.append("REASONING: _____________________________________________")
  73. lines.append("CRITERIA: _______")
  74. lines.append("")
  75. lines.append("=" * 80)
  76. lines.append("")
  77. return lines
  78. if __name__ == "__main__":
  79. # Example usage
  80. import json
  81. from pipeline.common_defs import Chunk, Message
  82. with open('pipeline_output/random_samples.json', 'r') as f:
  83. samples_data = json.load(f)
  84. # Reconstruct chunks (simplified)
  85. samples = []
  86. for item in samples_data:
  87. chunk = Chunk(
  88. chunk_id=item['chunk_id'],
  89. start_line=item['start_line'],
  90. end_line=item['end_line'],
  91. messages=[Message(1, "", "Sender", "Sample message", "")],
  92. combined_text="",
  93. timestamp_start="",
  94. timestamp_end=""
  95. )
  96. samples.append(chunk)
  97. generator = LabelingTemplateGenerator()
  98. template_path = generator.execute(samples)
  99. print(f"Template created: {template_path}")