enhance rag retrieval + summary
This commit is contained in:
@@ -0,0 +1,59 @@
|
||||
package com.aiteacher.retrieval;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* Post-processes generated answers to strip citation labels that do not
|
||||
* correspond to any passage retrieved for the current query, preventing
|
||||
* hallucinated source references from reaching the user.
|
||||
*/
|
||||
@Service
|
||||
public class CitationValidatorService {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(CitationValidatorService.class);
|
||||
|
||||
/** Matches citation labels of the form [S1], [F2], [S12], etc. */
|
||||
private static final Pattern CITATION_PATTERN = Pattern.compile("\\[(S|F)\\d+\\]");
|
||||
|
||||
/**
|
||||
* Removes any {@code [Sx]} / {@code [Fx]} citation in {@code generatedAnswer}
|
||||
* whose label is not contained in {@code validLabels}.
|
||||
*
|
||||
* @param generatedAnswer raw model output
|
||||
* @param validLabels set of labels present in the retrieved context
|
||||
* @return cleaned answer text with hallucinated citations removed
|
||||
*/
|
||||
public String validate(String generatedAnswer, Set<String> validLabels) {
|
||||
if (generatedAnswer == null) return "";
|
||||
|
||||
Matcher matcher = CITATION_PATTERN.matcher(generatedAnswer);
|
||||
List<String> removed = new ArrayList<>();
|
||||
StringBuffer sb = new StringBuffer();
|
||||
|
||||
while (matcher.find()) {
|
||||
String label = matcher.group();
|
||||
String inner = label.substring(1, label.length() - 1); // strip [ ]
|
||||
if (validLabels.contains(inner)) {
|
||||
matcher.appendReplacement(sb, Matcher.quoteReplacement(label));
|
||||
} else {
|
||||
removed.add(inner);
|
||||
matcher.appendReplacement(sb, "");
|
||||
}
|
||||
}
|
||||
matcher.appendTail(sb);
|
||||
|
||||
if (!removed.isEmpty()) {
|
||||
log.warn("Stripped hallucinated citations: {}", removed);
|
||||
}
|
||||
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user