60 lines
2.0 KiB
Java
60 lines
2.0 KiB
Java
package com.aiteacher.retrieval;
|
|
|
|
import org.slf4j.Logger;
|
|
import org.slf4j.LoggerFactory;
|
|
import org.springframework.stereotype.Service;
|
|
|
|
import java.util.ArrayList;
|
|
import java.util.List;
|
|
import java.util.Set;
|
|
import java.util.regex.Matcher;
|
|
import java.util.regex.Pattern;
|
|
|
|
/**
|
|
* Post-processes generated answers to strip citation labels that do not
|
|
* correspond to any passage retrieved for the current query, preventing
|
|
* hallucinated source references from reaching the user.
|
|
*/
|
|
@Service
|
|
public class CitationValidatorService {
|
|
|
|
private static final Logger log = LoggerFactory.getLogger(CitationValidatorService.class);
|
|
|
|
/** Matches citation labels of the form [S1], [F2], [S12], etc. */
|
|
private static final Pattern CITATION_PATTERN = Pattern.compile("\\[(S|F)\\d+\\]");
|
|
|
|
/**
|
|
* Removes any {@code [Sx]} / {@code [Fx]} citation in {@code generatedAnswer}
|
|
* whose label is not contained in {@code validLabels}.
|
|
*
|
|
* @param generatedAnswer raw model output
|
|
* @param validLabels set of labels present in the retrieved context
|
|
* @return cleaned answer text with hallucinated citations removed
|
|
*/
|
|
public String validate(String generatedAnswer, Set<String> validLabels) {
|
|
if (generatedAnswer == null) return "";
|
|
|
|
Matcher matcher = CITATION_PATTERN.matcher(generatedAnswer);
|
|
List<String> removed = new ArrayList<>();
|
|
StringBuffer sb = new StringBuffer();
|
|
|
|
while (matcher.find()) {
|
|
String label = matcher.group();
|
|
String inner = label.substring(1, label.length() - 1); // strip [ ]
|
|
if (validLabels.contains(inner)) {
|
|
matcher.appendReplacement(sb, Matcher.quoteReplacement(label));
|
|
} else {
|
|
removed.add(inner);
|
|
matcher.appendReplacement(sb, "");
|
|
}
|
|
}
|
|
matcher.appendTail(sb);
|
|
|
|
if (!removed.isEmpty()) {
|
|
log.warn("Stripped hallucinated citations: {}", removed);
|
|
}
|
|
|
|
return sb.toString();
|
|
}
|
|
}
|