| 12345678910111213141516171819202122232425 |
- """
- Compare and combine keyword identification methods.
- """
- import json
- def combine_keywords(semantic_results, llm_results):
- """Combine keywords from both methods"""
- combined = {}
- for criterion_num_str in semantic_results["criteria"].keys():
- criterion_num = int(criterion_num_str)
- semantic_kws = set(kw["word"] for kw in semantic_results["criteria"][criterion_num_str]["keywords"])
- llm_kws = set(llm_results["criteria"][criterion_num_str]["keywords"])
- combined[criterion_num] = sorted(list(semantic_kws | llm_kws))
- return combined
- def analyze_overlap(semantic_results, llm_results):
- """Analyze overlap between methods"""
- print("\nKEYWORD METHOD COMPARISON")
- for criterion_num_str in semantic_results["criteria"].keys():
- criterion_num = int(criterion_num_str)
- semantic_kws = set(kw["word"] for kw in semantic_results["criteria"][criterion_num_str]["keywords"])
- llm_kws = set(llm_results["criteria"][criterion_num_str]["keywords"])
- overlap = semantic_kws & llm_kws
- print(f"Criterion {criterion_num}: {len(overlap)} overlap, {len(semantic_kws | llm_kws)} total")
|