""" Step 6: Generate attorney labeling template. """ from typing import List from pipeline.models.base import PipelineStep from pipeline.common_defs import Chunk, CASE_NAME, SUBPOENA_CRITERIA class LabelingTemplateGenerator(PipelineStep): """Generate attorney labeling template""" def __init__(self, output_dir: str = './pipeline_output'): super().__init__(output_dir) def execute(self, samples: List[Chunk]) -> str: """ Generate attorney labeling template. Args: samples: List of sampled chunks Returns: Path to generated template file """ self.logger.info(f"Generating labeling template for {len(samples)} samples...") template = self._create_template(samples) filepath = self.output_dir / 'attorney_labeling_template.txt' with open(filepath, 'w') as f: f.write(template) self.logger.info(f"Template saved to: {filepath}") return str(filepath) def _create_template(self, samples: List[Chunk]) -> str: """Create the template content""" lines = [] # Header lines.append("ATTORNEY LABELING TEMPLATE") lines.append(CASE_NAME) lines.append("=" * 80) lines.append("") # Instructions lines.append("INSTRUCTIONS:") lines.append("For each message below, please provide:") lines.append("1. RESPONSIVE: YES or NO") lines.append("2. REASONING: Brief explanation of your decision") lines.append("3. CRITERIA: Which subpoena criteria matched (1-7):") lines.append("") for num, desc in SUBPOENA_CRITERIA.items(): lines.append(f" {num}. {desc}") lines.append("") lines.append("=" * 80) lines.append("") # Samples for i, sample in enumerate(samples, 1): lines.extend(self._format_sample(i, sample)) return "\n".join(lines) def _format_sample(self, sample_num: int, chunk: Chunk) -> List[str]: """Format a single sample""" lines = [] lines.append(f"SAMPLE {sample_num}") lines.append("-" * 80) # First message (target for labeling) if chunk.messages: first_msg = chunk.messages[0] lines.append(f"Line: {first_msg.line_number}") lines.append(f"Time: {first_msg.timestamp}") lines.append(f"Sender: {first_msg.sender}") lines.append(f"Message: {first_msg.message}") lines.append("") # Context (surrounding messages) lines.append("Context (surrounding messages):") for j, msg in enumerate(chunk.messages[:5], 1): marker = ">>>" if j == 1 else " " msg_preview = msg.message[:80] + "..." if len(msg.message) > 80 else msg.message lines.append(f"{marker} [{msg.sender}]: {msg_preview}") lines.append("") # Response fields lines.append("RESPONSIVE: _______") lines.append("REASONING: _____________________________________________") lines.append("CRITERIA: _______") lines.append("") lines.append("=" * 80) lines.append("") return lines if __name__ == "__main__": # Example usage import json from pipeline.common_defs import Chunk, Message with open('pipeline_output/random_samples.json', 'r') as f: samples_data = json.load(f) # Reconstruct chunks (simplified) samples = [] for item in samples_data: chunk = Chunk( chunk_id=item['chunk_id'], start_line=item['start_line'], end_line=item['end_line'], messages=[Message(1, "", "Sender", "Sample message", "")], combined_text="", timestamp_start="", timestamp_end="" ) samples.append(chunk) generator = LabelingTemplateGenerator() template_path = generator.execute(samples) print(f"Template created: {template_path}")