combine_keywords.py 1.2 KB

1234567891011121314151617181920212223242526272829
  1. """
  2. Compare and combine keyword identification methods.
  3. """
  4. import json
  5. def combine_keywords(semantic_results, llm_results):
  6. """Combine keywords from both methods"""
  7. combined = {}
  8. for criterion_num_str in semantic_results["criteria"].keys():
  9. criterion_num = int(criterion_num_str)
  10. semantic_kws = set(kw["word"] for kw in semantic_results["criteria"][criterion_num_str]["keywords"])
  11. llm_kws = set(
  12. kw.lower() for kw in llm_results["criteria"][criterion_num_str]["keywords"]
  13. )
  14. combined[criterion_num] = sorted(list(semantic_kws | llm_kws))
  15. return combined
  16. def analyze_overlap(semantic_results, llm_results):
  17. """Analyze overlap between methods"""
  18. print("\nKEYWORD METHOD COMPARISON")
  19. for criterion_num_str in semantic_results["criteria"].keys():
  20. criterion_num = int(criterion_num_str)
  21. semantic_kws = set(kw["word"] for kw in semantic_results["criteria"][criterion_num_str]["keywords"])
  22. llm_kws = set(
  23. kw.lower() for kw in llm_results["criteria"][criterion_num_str]["keywords"]
  24. )
  25. overlap = semantic_kws & llm_kws
  26. print(f"Criterion {criterion_num}: {len(overlap)} overlap, {len(semantic_kws | llm_kws)} total")