enhance rag retrieval + summary

This commit is contained in:
Adrien
2026-04-07 22:39:28 +02:00
parent 0cf318f0a7
commit aee6a9dfba
34 changed files with 2306 additions and 279 deletions
@@ -1,21 +1,25 @@
package com.aiteacher.topic;
import com.aiteacher.book.Book;
import com.aiteacher.book.BookRepository;
import com.aiteacher.book.BookStatus;
import com.aiteacher.book.NoKnowledgeSourceException;
import com.aiteacher.document.FigureEntity;
import com.aiteacher.document.SectionEntity;
import com.aiteacher.retrieval.NeurosurgeryRetriever;
import com.aiteacher.retrieval.RetrievalResult;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.ai.chat.client.ChatClient;
import org.springframework.ai.chat.client.advisor.vectorstore.QuestionAnswerAdvisor;
import org.springframework.ai.chat.model.ChatResponse;
import org.springframework.ai.document.Document;
import org.springframework.ai.vectorstore.VectorStore;
import org.springframework.stereotype.Service;
import java.time.Instant;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.UUID;
@Service
public class TopicSummaryService {
@@ -29,80 +33,190 @@ public class TopicSummaryService {
When answering:
- Structure your response clearly with key points
- If the context mentions specific book titles and page numbers, reference them
- Cite claims using ONLY the reference labels provided in the context (e.g. [S1], [F2]).
Do not invent page numbers, section titles, or labels not present in the CONTEXT block.
- If the retrieved context does not contain sufficient information on the topic,
explicitly state: "The uploaded books do not contain sufficient information on this topic."
- Never hallucinate or fabricate clinical information
""";
private final ChatClient chatClient;
private final VectorStore vectorStore;
private final BookRepository bookRepository;
private final NeurosurgeryRetriever retriever;
private final TopicSummaryRepository summaryRepository;
private final ObjectMapper objectMapper;
public TopicSummaryService(ChatClient chatClient, VectorStore vectorStore,
BookRepository bookRepository) {
public TopicSummaryService(ChatClient chatClient,
BookRepository bookRepository,
NeurosurgeryRetriever retriever,
TopicSummaryRepository summaryRepository,
ObjectMapper objectMapper) {
this.chatClient = chatClient;
this.vectorStore = vectorStore;
this.bookRepository = bookRepository;
this.retriever = retriever;
this.summaryRepository = summaryRepository;
this.objectMapper = objectMapper;
}
public TopicSummaryResponse generateSummary(Topic topic) {
if (!bookRepository.existsByStatus(BookStatus.READY)) {
List<Book> readyBooks = bookRepository.findAll().stream()
.filter(b -> b.getStatus() == BookStatus.READY)
.toList();
if (readyBooks.isEmpty()) {
throw new NoKnowledgeSourceException(
"No books are available as knowledge sources. Please upload and process at least one book.");
}
String question = buildQuestion(topic);
ChatResponse response = chatClient.prompt()
.system(SYSTEM_PROMPT)
.advisors(QuestionAnswerAdvisor.builder(vectorStore).build())
.user(question)
.call()
.chatResponse();
List<SectionEntity> allSections = new ArrayList<>();
List<FigureEntity> allFigures = new ArrayList<>();
for (Book book : readyBooks) {
RetrievalResult result = retriever.retrieve(question, book.getId());
allSections.addAll(result.parentSections());
allFigures.addAll(result.figures());
}
String summary = response.getResult().getOutput().getText();
List<TopicSummaryResponse.SourceReference> sources = extractSources(response);
log.debug("Topic summary for '{}': {} sections, {} figures retrieved",
topic.getName(), allSections.size(), allFigures.size());
String contextPrompt = buildContextPrompt(question, allSections, allFigures);
String summary = chatClient.prompt()
.system(SYSTEM_PROMPT)
.user(contextPrompt)
.call()
.content();
List<TopicSummaryResponse.SourceReference> sources = buildSources(allSections, allFigures, readyBooks);
Instant generatedAt = Instant.now();
int summaryNumber = (int) summaryRepository.countByTopicId(topic.getId()) + 1;
String sourcesJson = serializeSources(sources);
TopicSummaryEntity entity = new TopicSummaryEntity(
topic.getId(), summaryNumber, summary, sourcesJson, generatedAt);
entity = summaryRepository.save(entity);
return new TopicSummaryResponse(
entity.getId(),
summaryNumber,
topic.getId(),
topic.getName(),
summary,
sources,
Instant.now()
generatedAt
);
}
public List<SavedSummaryItem> listSummaries(String topicId) {
return summaryRepository.findByTopicIdOrderBySummaryNumberAsc(topicId).stream()
.map(e -> new SavedSummaryItem(e.getId(), e.getSummaryNumber(), e.getGeneratedAt()))
.toList();
}
public TopicSummaryResponse getSummary(UUID summaryId) {
TopicSummaryEntity entity = summaryRepository.findById(summaryId)
.orElseThrow(() -> new NoSuchElementException("Summary not found."));
List<TopicSummaryResponse.SourceReference> sources = deserializeSources(entity.getSourcesJson());
return new TopicSummaryResponse(
entity.getId(),
entity.getSummaryNumber(),
entity.getTopicId(),
entity.getTopicId(),
entity.getSummary(),
sources,
entity.getGeneratedAt()
);
}
private String buildQuestion(Topic topic) {
return String.format(
"Please provide a comprehensive educational summary of the following neurosurgery topic: " +
"Provide a comprehensive educational summary of the following neurosurgery topic: " +
"%s. Topic description: %s. " +
"Include key concepts, clinical considerations, and important details that a neurosurgeon should know.",
topic.getName(), topic.getDescription()
);
}
private List<TopicSummaryResponse.SourceReference> extractSources(ChatResponse response) {
List<TopicSummaryResponse.SourceReference> sources = new ArrayList<>();
private String buildContextPrompt(String question,
List<SectionEntity> sections,
List<FigureEntity> figures) {
StringBuilder sb = new StringBuilder();
if (response.getMetadata() != null) {
Object retrieved = response.getMetadata().get(QuestionAnswerAdvisor.RETRIEVED_DOCUMENTS);
if (retrieved instanceof List<?> docs) {
for (Object docObj : docs) {
if (docObj instanceof Document doc) {
Map<String, Object> metadata = doc.getMetadata();
String bookTitle = (String) metadata.get("book_title");
Object pageObj = metadata.get("page_number");
Integer page = pageObj instanceof Number n ? n.intValue() : null;
if (bookTitle != null) {
sources.add(new TopicSummaryResponse.SourceReference(bookTitle, page));
}
}
}
if (!sections.isEmpty()) {
sb.append("CONTEXT:\n\n");
for (int i = 0; i < sections.size(); i++) {
SectionEntity s = sections.get(i);
sb.append("[S").append(i + 1).append("] ")
.append(s.getTitle()).append(", p.").append(s.getPageStart()).append("\n");
sb.append(s.getFullText()).append("\n\n");
}
}
// Deduplicate by bookTitle + page
if (!figures.isEmpty()) {
sb.append("AVAILABLE FIGURES:\n");
for (int i = 0; i < figures.size(); i++) {
FigureEntity f = figures.get(i);
sb.append("[F").append(i + 1).append("] ")
.append(f.getLabel() != null ? f.getLabel() : "Figure")
.append(" (p.").append(f.getPage()).append("): ")
.append(f.getCaption() != null ? f.getCaption() : "")
.append("\n");
}
sb.append("\n");
}
sb.append("QUESTION:\n").append(question);
return sb.toString();
}
private List<TopicSummaryResponse.SourceReference> buildSources(List<SectionEntity> sections,
List<FigureEntity> figures,
List<Book> readyBooks) {
List<TopicSummaryResponse.SourceReference> sources = new ArrayList<>();
for (SectionEntity s : sections) {
Book book = readyBooks.stream()
.filter(b -> b.getId().equals(s.getBookId()))
.findFirst()
.orElse(null);
String title = book != null ? book.getTitle() : "Book";
String bookId = book != null ? book.getId().toString() : null;
sources.add(new TopicSummaryResponse.SourceReference(bookId, title, s.getPageStart()));
}
for (FigureEntity f : figures) {
Book book = readyBooks.stream()
.filter(b -> b.getId().equals(f.getBookId()))
.findFirst()
.orElse(null);
String title = book != null ? book.getTitle() : "Book";
String bookId = book != null ? book.getId().toString() : null;
sources.add(new TopicSummaryResponse.SourceReference(bookId, title, f.getPage()));
}
return sources.stream().distinct().toList();
}
private String serializeSources(List<TopicSummaryResponse.SourceReference> sources) {
try {
return objectMapper.writeValueAsString(sources);
} catch (JsonProcessingException e) {
log.warn("Failed to serialize sources, storing empty array", e);
return "[]";
}
}
private List<TopicSummaryResponse.SourceReference> deserializeSources(String json) {
try {
return objectMapper.readValue(json,
objectMapper.getTypeFactory().constructCollectionType(
List.class, TopicSummaryResponse.SourceReference.class));
} catch (JsonProcessingException e) {
log.warn("Failed to deserialize sources from stored JSON", e);
return List.of();
}
}
}