package com.aiteacher.retrieval; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.stereotype.Service; import java.util.ArrayList; import java.util.List; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * Post-processes generated answers to strip citation labels that do not * correspond to any passage retrieved for the current query, preventing * hallucinated source references from reaching the user. */ @Service public class CitationValidatorService { private static final Logger log = LoggerFactory.getLogger(CitationValidatorService.class); /** Matches citation labels of the form [S1], [F2], [S12], etc. */ private static final Pattern CITATION_PATTERN = Pattern.compile("\\[(S|F)\\d+\\]"); /** * Removes any {@code [Sx]} / {@code [Fx]} citation in {@code generatedAnswer} * whose label is not contained in {@code validLabels}. * * @param generatedAnswer raw model output * @param validLabels set of labels present in the retrieved context * @return cleaned answer text with hallucinated citations removed */ public String validate(String generatedAnswer, Set validLabels) { if (generatedAnswer == null) return ""; Matcher matcher = CITATION_PATTERN.matcher(generatedAnswer); List removed = new ArrayList<>(); StringBuffer sb = new StringBuffer(); while (matcher.find()) { String label = matcher.group(); String inner = label.substring(1, label.length() - 1); // strip [ ] if (validLabels.contains(inner)) { matcher.appendReplacement(sb, Matcher.quoteReplacement(label)); } else { removed.add(inner); matcher.appendReplacement(sb, ""); } } matcher.appendTail(sb); if (!removed.isEmpty()) { log.warn("Stripped hallucinated citations: {}", removed); } return sb.toString(); } }