| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126 |
- """
- Step 6: Generate attorney labeling template.
- """
- from typing import List
- from pipeline.models.base import PipelineStep
- from pipeline.common_defs import Chunk, CASE_NAME, SUBPOENA_CRITERIA
- class LabelingTemplateGenerator(PipelineStep):
- """Generate attorney labeling template"""
-
- def __init__(self, output_dir: str = './pipeline_output'):
- super().__init__(output_dir)
-
- def execute(self, samples: List[Chunk]) -> str:
- """
- Generate attorney labeling template.
-
- Args:
- samples: List of sampled chunks
-
- Returns:
- Path to generated template file
- """
- self.logger.info(f"Generating labeling template for {len(samples)} samples...")
-
- template = self._create_template(samples)
-
- filepath = self.output_dir / 'attorney_labeling_template.txt'
- with open(filepath, 'w') as f:
- f.write(template)
-
- self.logger.info(f"Template saved to: {filepath}")
-
- return str(filepath)
-
- def _create_template(self, samples: List[Chunk]) -> str:
- """Create the template content"""
- lines = []
-
- # Header
- lines.append("ATTORNEY LABELING TEMPLATE")
- lines.append(CASE_NAME)
- lines.append("=" * 80)
- lines.append("")
-
- # Instructions
- lines.append("INSTRUCTIONS:")
- lines.append("For each message below, please provide:")
- lines.append("1. RESPONSIVE: YES or NO")
- lines.append("2. REASONING: Brief explanation of your decision")
- lines.append("3. CRITERIA: Which subpoena criteria matched (1-7):")
- lines.append("")
-
- for num, desc in SUBPOENA_CRITERIA.items():
- lines.append(f" {num}. {desc}")
-
- lines.append("")
- lines.append("=" * 80)
- lines.append("")
-
- # Samples
- for i, sample in enumerate(samples, 1):
- lines.extend(self._format_sample(i, sample))
-
- return "\n".join(lines)
-
- def _format_sample(self, sample_num: int, chunk: Chunk) -> List[str]:
- """Format a single sample"""
- lines = []
-
- lines.append(f"SAMPLE {sample_num}")
- lines.append("-" * 80)
-
- # First message (target for labeling)
- if chunk.messages:
- first_msg = chunk.messages[0]
- lines.append(f"Line: {first_msg.line_number}")
- lines.append(f"Time: {first_msg.timestamp}")
- lines.append(f"Sender: {first_msg.sender}")
- lines.append(f"Message: {first_msg.message}")
- lines.append("")
-
- # Context (surrounding messages)
- lines.append("Context (surrounding messages):")
- for j, msg in enumerate(chunk.messages[:5], 1):
- marker = ">>>" if j == 1 else " "
- msg_preview = msg.message[:80] + "..." if len(msg.message) > 80 else msg.message
- lines.append(f"{marker} [{msg.sender}]: {msg_preview}")
- lines.append("")
-
- # Response fields
- lines.append("RESPONSIVE: _______")
- lines.append("REASONING: _____________________________________________")
- lines.append("CRITERIA: _______")
- lines.append("")
- lines.append("=" * 80)
- lines.append("")
-
- return lines
- if __name__ == "__main__":
- # Example usage
- import json
- from pipeline.common_defs import Chunk, Message
-
- with open('pipeline_output/random_samples.json', 'r') as f:
- samples_data = json.load(f)
-
- # Reconstruct chunks (simplified)
- samples = []
- for item in samples_data:
- chunk = Chunk(
- chunk_id=item['chunk_id'],
- start_line=item['start_line'],
- end_line=item['end_line'],
- messages=[Message(1, "", "Sender", "Sample message", "")],
- combined_text="",
- timestamp_start="",
- timestamp_end=""
- )
- samples.append(chunk)
-
- generator = LabelingTemplateGenerator()
- template_path = generator.execute(samples)
- print(f"Template created: {template_path}")
|