@inproceedings{d9d27e321f1648e4ae32d195e720a6bf,
title = "LLM-Based Medical Document Evaluation: Integrating Human Expert Insights",
abstract = "Large Language Models (LLMs) show potential in medical document generation, but ensuring reliability requires extensive expert involvement, limiting clinical applications. To address this challenge, we developed an LLM-based evaluation framework with three progressive Chain of Thought (CoT) strategies: Qualitative (expert persona), Quantitative-qualitative (error analysis), and Insight-integrated (expert reasoning). This framework captures nuanced evaluation patterns while maintaining efficiency. When tested on 33 LLM-generated Emergency Department records across five criteria, our Insight-integrated approach demonstrated strong correlation with expert evaluations (r=0.680, p < .001), outperforming both Qualitative (r=0.524) and Quantitative-qualitative (r=0.630) approaches. Our findings suggest that LLM-based evaluation frameworks can align with expert assessments as useful tools for validating medical documentation in clinical settings.",
keywords = "clinical validation, evaluation framework, expert assessment, large language models, medical document evaluation, prompt engineering",
author = "Junhyuk Seo and Dasol Choi and Wonchul Cha and Taerim Kim",
note = "Publisher Copyright: {\textcopyright} 2025 The Authors.; 20th World Congress on Medical and Health Informatics, MEDINFO 2025 ; Conference date: 09-08-2025 Through 13-08-2025",
year = "2025",
month = aug,
day = "7",
doi = "10.3233/SHTI250995",
language = "English",
series = "Studies in Health Technology and Informatics",
publisher = "IOS Press BV",
pages = "1029--1033",
editor = "Househ, \{Mowafa S.\} and Househ, \{Mowafa S.\} and Tariq, \{Zain Ul Abideen\} and Mahmood Al-Zubaidi and Uzair Shah and Elaine Huesing",
booktitle = "MEDINFO 2025 - Healthcare Smart x Medicine Deep",
}