combine_keywords.py 1.1 KB

12345678910111213141516171819202122232425
  1. """
  2. Compare and combine keyword identification methods.
  3. """
  4. import json
  5. def combine_keywords(semantic_results, llm_results):
  6. """Combine keywords from both methods"""
  7. combined = {}
  8. for criterion_num_str in semantic_results["criteria"].keys():
  9. criterion_num = int(criterion_num_str)
  10. semantic_kws = set(kw["word"] for kw in semantic_results["criteria"][criterion_num_str]["keywords"])
  11. llm_kws = set(llm_results["criteria"][criterion_num_str]["keywords"])
  12. combined[criterion_num] = sorted(list(semantic_kws | llm_kws))
  13. return combined
  14. def analyze_overlap(semantic_results, llm_results):
  15. """Analyze overlap between methods"""
  16. print("\nKEYWORD METHOD COMPARISON")
  17. for criterion_num_str in semantic_results["criteria"].keys():
  18. criterion_num = int(criterion_num_str)
  19. semantic_kws = set(kw["word"] for kw in semantic_results["criteria"][criterion_num_str]["keywords"])
  20. llm_kws = set(llm_results["criteria"][criterion_num_str]["keywords"])
  21. overlap = semantic_kws & llm_kws
  22. print(f"Criterion {criterion_num}: {len(overlap)} overlap, {len(semantic_kws | llm_kws)} total")