""" Compare and combine keyword identification methods. """ import json def combine_keywords(semantic_results, llm_results): """Combine keywords from both methods""" combined = {} for criterion_num_str in semantic_results["criteria"].keys(): criterion_num = int(criterion_num_str) semantic_kws = set(kw["word"] for kw in semantic_results["criteria"][criterion_num_str]["keywords"]) llm_kws = set(llm_results["criteria"][criterion_num_str]["keywords"]) combined[criterion_num] = sorted(list(semantic_kws | llm_kws)) return combined def analyze_overlap(semantic_results, llm_results): """Analyze overlap between methods""" print("\nKEYWORD METHOD COMPARISON") for criterion_num_str in semantic_results["criteria"].keys(): criterion_num = int(criterion_num_str) semantic_kws = set(kw["word"] for kw in semantic_results["criteria"][criterion_num_str]["keywords"]) llm_kws = set(llm_results["criteria"][criterion_num_str]["keywords"]) overlap = semantic_kws & llm_kws print(f"Criterion {criterion_num}: {len(overlap)} overlap, {len(semantic_kws | llm_kws)} total")