| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195 |
- """
- Common definitions and constants for the legal discovery pipeline.
- """
- from dataclasses import dataclass
- from typing import List, Dict, Optional
- from enum import Enum
- # Case-specific criteria
- CASE_NAME = "Jennifer Capasso v. Memorial Sloan Kettering Cancer Center"
- PLAINTIFF_NAME = "Jennifer Capasso"
- # Plaintiff name variations
- PLAINTIFF_VARIATIONS = [
- "jennifer capasso",
- "jen capasso",
- "jennifer",
- "jen",
- "jenn",
- "jenn capasso",
- "jennifer danielle capasso",
- "capasso",
- "j capasso",
- "jdc",
- ]
- # Facility names
- FACILITY_NAMES = ["memorial sloan kettering", "msk", "sloan kettering", "mskcc", "sk"]
- # Key topics for keyword filtering
- KEY_TOPICS = [
- # Treatment at MSK
- "treatment",
- "medical care",
- "doctor",
- "physician",
- "nurse",
- "appointment",
- "visit",
- "hospital",
- "clinic",
- "surgery",
- "procedure",
- "diagnosis",
- "medication",
- "prescription",
- # Complaints
- "complaint",
- "complain",
- "complained",
- "issue",
- "problem",
- "concern",
- "patient representative",
- "patient advocate",
- # Patient information updates
- "patient information",
- "medical records",
- "pronouns",
- "gender identity",
- "gender marker",
- "update records",
- # Discrimination
- "discrimination",
- "discriminate",
- "discriminated",
- "bias",
- "unfair",
- "mistreat",
- "transphobia",
- "misgendered",
- "deadname",
- "wrong pronouns",
- "refused",
- "denied",
- # March 7, 2022 surgery
- "march 7",
- "march 2022",
- "3/7/22",
- "3/7/2022",
- "lung surgery",
- "wedge resection"
- # Emotional distress
- "emotional distress",
- "mental anguish",
- "pain",
- "suffering",
- "trauma",
- "anxious",
- "depressed",
- "stress",
- ]
- # Text normalization expansions
- TEXT_EXPANSIONS = {
- "admin": "administrator",
- "appt": "appointment",
- "dept": "department",
- "dr.": "doctor",
- "dr ": "doctor ",
- "info": "information",
- "meds": "medication",
- "msk": "memorial sloan kettering",
- "mskcc": "memorial sloan kettering",
- "proc": "procedure",
- "pt": "patient",
- "pts": "patients",
- "rep": "representative",
- "rx": "prescription",
- "sk": "memorial sloan kettering",
- "med": "medical",
- }
- # Subpoena criteria descriptions
- SUBPOENA_CRITERIA = {
- 1: "Medical treatment, care, procedures, appointments, services, and healthcare experiences at Memorial Sloan Kettering Cancer Center (MSK) involving patient Jennifer Capasso.",
- 2: "Complaints, grievances, concerns, feedback, disputes, or responses regarding patient care, service quality, or treatment issues raised with MSK staff, personnel, administrators, patient representatives, advocates, or employees concerning Jennifer Capasso.",
- 3: "Patient information updates, record changes, profile modifications, requests to change pronouns, gender identity markers, gender designation, preferred name, or demographic information in medical records for Jennifer Capasso at MSK.",
- 4: "Gender markers, gender identity documentation, sex designation, pronouns, or gender-related patient identifiers used in medical records, files, or systems at hospitals, medical facilities, or healthcare institutions where Jennifer Capasso received care or treatment.",
- 5: "Discrimination, bias, prejudice, mistreatment, harassment, disparate treatment, or negative experiences based on gender identity, transgender status, or gender expression that Jennifer Capasso encountered in any context, setting, location, or institution.",
- 6: "Surgery, surgical procedure, operation, medical intervention, or treatment performed on March 7, 2022 at Memorial Sloan Kettering Cancer Center involving Jennifer Capasso.",
- 7: "Emotional distress, psychological harm, mental anguish, mental suffering, anxiety, depression, trauma, pain and suffering, physical harm, economic damages, financial losses, medical expenses, lost wages, or other compensable harm resulting from or related to Jennifer Capasso's care, treatment, or experiences at MSK.",
- }
- # Query texts for semantic filtering
- SEMANTIC_QUERIES = SUBPOENA_CRITERIA.values()
- # Model configurations
- class ModelConfig:
- """Configuration for LLM models"""
- QWEN3_235B = {
- 'name': 'Qwen/Qwen3-235B-Instruct',
- 'gpus': 4,
- 'cost_per_hour': 2.56,
- 'port': 8000,
- 'quantization': 'awq'
- }
-
- QWEN25_72B = {
- 'name': 'Qwen/Qwen2.5-72B-Instruct',
- 'gpus': 2,
- 'cost_per_hour': 1.28,
- 'port': 8001,
- 'quantization': None
- }
- # Confidence levels
- class ConfidenceLevel(Enum):
- HIGH = "high"
- MEDIUM = "medium"
- LOW = "low"
- @dataclass
- class Message:
- """Represents a single message"""
- line_number: int
- timestamp: str
- sender: str
- message: str
- message_normalized: str = ""
- @dataclass
- class Chunk:
- """Represents a chunk of messages"""
- chunk_id: int
- start_line: int
- end_line: int
- messages: List[Message]
- combined_text: str
- timestamp_start: str
- timestamp_end: str
- keyword_matches: Optional[List[str]] = None
- keyword_score: Optional[int] = None
- semantic_score_model1: Optional[float] = None
- semantic_score_model2: Optional[float] = None
- semantic_score_combined: Optional[float] = None
- @dataclass
- class InferenceResult:
- """Results from LLM inference"""
- chunk_id: int
- responsive_line_numbers: List[int]
- reasoning: str
- confidence: ConfidenceLevel
- model_name: str
- @dataclass
- class MergedResult:
- """Merged results from dual models"""
- chunk_id: int
- responsive_line_numbers: List[int]
- confidence: ConfidenceLevel
- qwen3_lines: List[int]
- qwen25_lines: List[int]
- agreement: bool
|