Files
ai-teacher/backend/src/main/java/com/aiteacher/retrieval/CitationValidatorService.java
T
2026-04-07 22:39:28 +02:00

60 lines
2.0 KiB
Java

package com.aiteacher.retrieval;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Post-processes generated answers to strip citation labels that do not
* correspond to any passage retrieved for the current query, preventing
* hallucinated source references from reaching the user.
*/
@Service
public class CitationValidatorService {
private static final Logger log = LoggerFactory.getLogger(CitationValidatorService.class);
/** Matches citation labels of the form [S1], [F2], [S12], etc. */
private static final Pattern CITATION_PATTERN = Pattern.compile("\\[(S|F)\\d+\\]");
/**
* Removes any {@code [Sx]} / {@code [Fx]} citation in {@code generatedAnswer}
* whose label is not contained in {@code validLabels}.
*
* @param generatedAnswer raw model output
* @param validLabels set of labels present in the retrieved context
* @return cleaned answer text with hallucinated citations removed
*/
public String validate(String generatedAnswer, Set<String> validLabels) {
if (generatedAnswer == null) return "";
Matcher matcher = CITATION_PATTERN.matcher(generatedAnswer);
List<String> removed = new ArrayList<>();
StringBuffer sb = new StringBuffer();
while (matcher.find()) {
String label = matcher.group();
String inner = label.substring(1, label.length() - 1); // strip [ ]
if (validLabels.contains(inner)) {
matcher.appendReplacement(sb, Matcher.quoteReplacement(label));
} else {
removed.add(inner);
matcher.appendReplacement(sb, "");
}
}
matcher.appendTail(sb);
if (!removed.isEmpty()) {
log.warn("Stripped hallucinated citations: {}", removed);
}
return sb.toString();
}
}