enhance rag retrieval + summary
This commit is contained in:
@@ -1,21 +1,25 @@
|
||||
package com.aiteacher.topic;
|
||||
|
||||
import com.aiteacher.book.Book;
|
||||
import com.aiteacher.book.BookRepository;
|
||||
import com.aiteacher.book.BookStatus;
|
||||
import com.aiteacher.book.NoKnowledgeSourceException;
|
||||
import com.aiteacher.document.FigureEntity;
|
||||
import com.aiteacher.document.SectionEntity;
|
||||
import com.aiteacher.retrieval.NeurosurgeryRetriever;
|
||||
import com.aiteacher.retrieval.RetrievalResult;
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.ai.chat.client.ChatClient;
|
||||
import org.springframework.ai.chat.client.advisor.vectorstore.QuestionAnswerAdvisor;
|
||||
import org.springframework.ai.chat.model.ChatResponse;
|
||||
import org.springframework.ai.document.Document;
|
||||
import org.springframework.ai.vectorstore.VectorStore;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.time.Instant;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.NoSuchElementException;
|
||||
import java.util.UUID;
|
||||
|
||||
@Service
|
||||
public class TopicSummaryService {
|
||||
@@ -29,80 +33,190 @@ public class TopicSummaryService {
|
||||
|
||||
When answering:
|
||||
- Structure your response clearly with key points
|
||||
- If the context mentions specific book titles and page numbers, reference them
|
||||
- Cite claims using ONLY the reference labels provided in the context (e.g. [S1], [F2]).
|
||||
Do not invent page numbers, section titles, or labels not present in the CONTEXT block.
|
||||
- If the retrieved context does not contain sufficient information on the topic,
|
||||
explicitly state: "The uploaded books do not contain sufficient information on this topic."
|
||||
- Never hallucinate or fabricate clinical information
|
||||
""";
|
||||
|
||||
private final ChatClient chatClient;
|
||||
private final VectorStore vectorStore;
|
||||
private final BookRepository bookRepository;
|
||||
private final NeurosurgeryRetriever retriever;
|
||||
private final TopicSummaryRepository summaryRepository;
|
||||
private final ObjectMapper objectMapper;
|
||||
|
||||
public TopicSummaryService(ChatClient chatClient, VectorStore vectorStore,
|
||||
BookRepository bookRepository) {
|
||||
public TopicSummaryService(ChatClient chatClient,
|
||||
BookRepository bookRepository,
|
||||
NeurosurgeryRetriever retriever,
|
||||
TopicSummaryRepository summaryRepository,
|
||||
ObjectMapper objectMapper) {
|
||||
this.chatClient = chatClient;
|
||||
this.vectorStore = vectorStore;
|
||||
this.bookRepository = bookRepository;
|
||||
this.retriever = retriever;
|
||||
this.summaryRepository = summaryRepository;
|
||||
this.objectMapper = objectMapper;
|
||||
}
|
||||
|
||||
public TopicSummaryResponse generateSummary(Topic topic) {
|
||||
if (!bookRepository.existsByStatus(BookStatus.READY)) {
|
||||
List<Book> readyBooks = bookRepository.findAll().stream()
|
||||
.filter(b -> b.getStatus() == BookStatus.READY)
|
||||
.toList();
|
||||
|
||||
if (readyBooks.isEmpty()) {
|
||||
throw new NoKnowledgeSourceException(
|
||||
"No books are available as knowledge sources. Please upload and process at least one book.");
|
||||
}
|
||||
|
||||
String question = buildQuestion(topic);
|
||||
|
||||
ChatResponse response = chatClient.prompt()
|
||||
.system(SYSTEM_PROMPT)
|
||||
.advisors(QuestionAnswerAdvisor.builder(vectorStore).build())
|
||||
.user(question)
|
||||
.call()
|
||||
.chatResponse();
|
||||
List<SectionEntity> allSections = new ArrayList<>();
|
||||
List<FigureEntity> allFigures = new ArrayList<>();
|
||||
for (Book book : readyBooks) {
|
||||
RetrievalResult result = retriever.retrieve(question, book.getId());
|
||||
allSections.addAll(result.parentSections());
|
||||
allFigures.addAll(result.figures());
|
||||
}
|
||||
|
||||
String summary = response.getResult().getOutput().getText();
|
||||
List<TopicSummaryResponse.SourceReference> sources = extractSources(response);
|
||||
log.debug("Topic summary for '{}': {} sections, {} figures retrieved",
|
||||
topic.getName(), allSections.size(), allFigures.size());
|
||||
|
||||
String contextPrompt = buildContextPrompt(question, allSections, allFigures);
|
||||
String summary = chatClient.prompt()
|
||||
.system(SYSTEM_PROMPT)
|
||||
.user(contextPrompt)
|
||||
.call()
|
||||
.content();
|
||||
|
||||
List<TopicSummaryResponse.SourceReference> sources = buildSources(allSections, allFigures, readyBooks);
|
||||
Instant generatedAt = Instant.now();
|
||||
|
||||
int summaryNumber = (int) summaryRepository.countByTopicId(topic.getId()) + 1;
|
||||
String sourcesJson = serializeSources(sources);
|
||||
TopicSummaryEntity entity = new TopicSummaryEntity(
|
||||
topic.getId(), summaryNumber, summary, sourcesJson, generatedAt);
|
||||
entity = summaryRepository.save(entity);
|
||||
|
||||
return new TopicSummaryResponse(
|
||||
entity.getId(),
|
||||
summaryNumber,
|
||||
topic.getId(),
|
||||
topic.getName(),
|
||||
summary,
|
||||
sources,
|
||||
Instant.now()
|
||||
generatedAt
|
||||
);
|
||||
}
|
||||
|
||||
public List<SavedSummaryItem> listSummaries(String topicId) {
|
||||
return summaryRepository.findByTopicIdOrderBySummaryNumberAsc(topicId).stream()
|
||||
.map(e -> new SavedSummaryItem(e.getId(), e.getSummaryNumber(), e.getGeneratedAt()))
|
||||
.toList();
|
||||
}
|
||||
|
||||
public TopicSummaryResponse getSummary(UUID summaryId) {
|
||||
TopicSummaryEntity entity = summaryRepository.findById(summaryId)
|
||||
.orElseThrow(() -> new NoSuchElementException("Summary not found."));
|
||||
|
||||
List<TopicSummaryResponse.SourceReference> sources = deserializeSources(entity.getSourcesJson());
|
||||
|
||||
return new TopicSummaryResponse(
|
||||
entity.getId(),
|
||||
entity.getSummaryNumber(),
|
||||
entity.getTopicId(),
|
||||
entity.getTopicId(),
|
||||
entity.getSummary(),
|
||||
sources,
|
||||
entity.getGeneratedAt()
|
||||
);
|
||||
}
|
||||
|
||||
private String buildQuestion(Topic topic) {
|
||||
return String.format(
|
||||
"Please provide a comprehensive educational summary of the following neurosurgery topic: " +
|
||||
"Provide a comprehensive educational summary of the following neurosurgery topic: " +
|
||||
"%s. Topic description: %s. " +
|
||||
"Include key concepts, clinical considerations, and important details that a neurosurgeon should know.",
|
||||
topic.getName(), topic.getDescription()
|
||||
);
|
||||
}
|
||||
|
||||
private List<TopicSummaryResponse.SourceReference> extractSources(ChatResponse response) {
|
||||
List<TopicSummaryResponse.SourceReference> sources = new ArrayList<>();
|
||||
private String buildContextPrompt(String question,
|
||||
List<SectionEntity> sections,
|
||||
List<FigureEntity> figures) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
if (response.getMetadata() != null) {
|
||||
Object retrieved = response.getMetadata().get(QuestionAnswerAdvisor.RETRIEVED_DOCUMENTS);
|
||||
if (retrieved instanceof List<?> docs) {
|
||||
for (Object docObj : docs) {
|
||||
if (docObj instanceof Document doc) {
|
||||
Map<String, Object> metadata = doc.getMetadata();
|
||||
String bookTitle = (String) metadata.get("book_title");
|
||||
Object pageObj = metadata.get("page_number");
|
||||
Integer page = pageObj instanceof Number n ? n.intValue() : null;
|
||||
if (bookTitle != null) {
|
||||
sources.add(new TopicSummaryResponse.SourceReference(bookTitle, page));
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!sections.isEmpty()) {
|
||||
sb.append("CONTEXT:\n\n");
|
||||
for (int i = 0; i < sections.size(); i++) {
|
||||
SectionEntity s = sections.get(i);
|
||||
sb.append("[S").append(i + 1).append("] ")
|
||||
.append(s.getTitle()).append(", p.").append(s.getPageStart()).append("\n");
|
||||
sb.append(s.getFullText()).append("\n\n");
|
||||
}
|
||||
}
|
||||
|
||||
// Deduplicate by bookTitle + page
|
||||
if (!figures.isEmpty()) {
|
||||
sb.append("AVAILABLE FIGURES:\n");
|
||||
for (int i = 0; i < figures.size(); i++) {
|
||||
FigureEntity f = figures.get(i);
|
||||
sb.append("[F").append(i + 1).append("] ")
|
||||
.append(f.getLabel() != null ? f.getLabel() : "Figure")
|
||||
.append(" (p.").append(f.getPage()).append("): ")
|
||||
.append(f.getCaption() != null ? f.getCaption() : "")
|
||||
.append("\n");
|
||||
}
|
||||
sb.append("\n");
|
||||
}
|
||||
|
||||
sb.append("QUESTION:\n").append(question);
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
private List<TopicSummaryResponse.SourceReference> buildSources(List<SectionEntity> sections,
|
||||
List<FigureEntity> figures,
|
||||
List<Book> readyBooks) {
|
||||
List<TopicSummaryResponse.SourceReference> sources = new ArrayList<>();
|
||||
|
||||
for (SectionEntity s : sections) {
|
||||
Book book = readyBooks.stream()
|
||||
.filter(b -> b.getId().equals(s.getBookId()))
|
||||
.findFirst()
|
||||
.orElse(null);
|
||||
String title = book != null ? book.getTitle() : "Book";
|
||||
String bookId = book != null ? book.getId().toString() : null;
|
||||
sources.add(new TopicSummaryResponse.SourceReference(bookId, title, s.getPageStart()));
|
||||
}
|
||||
|
||||
for (FigureEntity f : figures) {
|
||||
Book book = readyBooks.stream()
|
||||
.filter(b -> b.getId().equals(f.getBookId()))
|
||||
.findFirst()
|
||||
.orElse(null);
|
||||
String title = book != null ? book.getTitle() : "Book";
|
||||
String bookId = book != null ? book.getId().toString() : null;
|
||||
sources.add(new TopicSummaryResponse.SourceReference(bookId, title, f.getPage()));
|
||||
}
|
||||
|
||||
return sources.stream().distinct().toList();
|
||||
}
|
||||
|
||||
private String serializeSources(List<TopicSummaryResponse.SourceReference> sources) {
|
||||
try {
|
||||
return objectMapper.writeValueAsString(sources);
|
||||
} catch (JsonProcessingException e) {
|
||||
log.warn("Failed to serialize sources, storing empty array", e);
|
||||
return "[]";
|
||||
}
|
||||
}
|
||||
|
||||
private List<TopicSummaryResponse.SourceReference> deserializeSources(String json) {
|
||||
try {
|
||||
return objectMapper.readValue(json,
|
||||
objectMapper.getTypeFactory().constructCollectionType(
|
||||
List.class, TopicSummaryResponse.SourceReference.class));
|
||||
} catch (JsonProcessingException e) {
|
||||
log.warn("Failed to deserialize sources from stored JSON", e);
|
||||
return List.of();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user