first implementation - image/drawing integration

2026-04-04 12:56:56 +02:00
parent fc5b22fba1
commit 5acfdd33c1
42 changed files with 2854 additions and 151 deletions
@@ -1,5 +1,7 @@
 package com.aiteacher.book;

+import com.aiteacher.document.FigureEntity;
+import com.aiteacher.document.FigureRepository;
 import org.springframework.http.HttpStatus;
 import org.springframework.http.ResponseEntity;
 import org.springframework.web.bind.annotation.*;
@@ -15,9 +17,11 @@ import java.util.UUID;
 public class BookController {

    private final BookService bookService;
+    private final FigureRepository figureRepository;

-    public BookController(BookService bookService) {
+    public BookController(BookService bookService, FigureRepository figureRepository) {
        this.bookService = bookService;
+        this.figureRepository = figureRepository;
    }

    @PostMapping(consumes = "multipart/form-data")
@@ -46,6 +50,36 @@ public class BookController {
        return ResponseEntity.noContent().build();
    }

+    @PostMapping("/{id}/reembed")
+    public ResponseEntity<Map<String, Object>> reembed(@PathVariable UUID id) {
+        Book book = bookService.reembed(id);
+        return ResponseEntity.accepted().body(Map.of(
+            "bookId", book.getId(),
+            "status", BookStatus.PROCESSING.name()
+        ));
+    }
+
+    @GetMapping("/{id}/figures")
+    public ResponseEntity<List<FigureResponse>> figures(@PathVariable UUID id) {
+        bookService.getById(id); // 404 if not found
+        List<FigureResponse> responses = figureRepository.findAllByBookId(id)
+            .stream()
+            .map(f -> toFigureResponse(id, f))
+            .toList();
+        return ResponseEntity.ok(responses);
+    }
+
+    private FigureResponse toFigureResponse(UUID bookId, FigureEntity f) {
+        String filename = f.getImagePath().substring(f.getImagePath().lastIndexOf('/') + 1);
+        String imageUrl = "/api/v1/figures/" + bookId + "/" + filename;
+        return new FigureResponse(
+            f.getId(), f.getLabel(), f.getCaption(),
+            f.getFigureType().name(), f.getPage(), imageUrl,
+            f.getSectionId(),
+            null // section title not eagerly loaded here
+        );
+    }
+
    private Map<String, Object> toSummaryResponse(Book book) {
        return Map.of(
            "id", book.getId(),
@@ -1,41 +1,75 @@
 package com.aiteacher.book;

+import com.aiteacher.document.*;
+import com.aiteacher.figure.FigureStorageService;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.springframework.ai.document.Document;
-import org.springframework.ai.reader.pdf.PagePdfDocumentReader;
-import org.springframework.ai.reader.pdf.config.PdfDocumentReaderConfig;
 import org.springframework.ai.vectorstore.VectorStore;
 import org.springframework.ai.vectorstore.filter.FilterExpressionBuilder;
-import org.springframework.core.io.FileSystemResource;
+import org.springframework.beans.factory.annotation.Value;
 import org.springframework.scheduling.annotation.Async;
 import org.springframework.stereotype.Service;
+import org.springframework.transaction.annotation.Transactional;

 import java.nio.file.Path;
-import java.util.List;
-import java.util.UUID;
-import java.util.regex.Pattern;
+import java.time.Instant;
+import java.util.*;

@Service
 public class BookEmbeddingService {

    private static final Logger log = LoggerFactory.getLogger(BookEmbeddingService.class);

-    // Pattern to detect diagram/figure captions
-    private static final Pattern CAPTION_PATTERN =
-        Pattern.compile("^(Figure|Fig\\.|Table|Diagram)\\s+[\\d.]+", Pattern.CASE_INSENSITIVE);
-
    private final VectorStore vectorStore;
    private final BookRepository bookRepository;

-    public BookEmbeddingService(VectorStore vectorStore, BookRepository bookRepository) {
+    @Value("${app.embedding.batch-size:50}")
+    private int embeddingBatchSize;
+
+    @Value("${app.embedding.batch-delay-ms:1000}")
+    private long embeddingBatchDelayMs;
+    private final PdfStructureParser pdfStructureParser;
+    private final FigureExtractionService figureExtractionService;
+    private final VisionDescriptionService visionDescriptionService;
+    private final TextChunkingService textChunkingService;
+    private final ChunkFigureRefService chunkFigureRefService;
+    private final SectionRepository sectionRepository;
+    private final ChapterRepository chapterRepository;
+    private final FigureRepository figureRepository;
+    private final ChunkFigureRefRepository chunkFigureRefRepository;
+    private final FigureStorageService figureStorageService;
+
+    public BookEmbeddingService(
+            VectorStore vectorStore,
+            BookRepository bookRepository,
+            PdfStructureParser pdfStructureParser,
+            FigureExtractionService figureExtractionService,
+            VisionDescriptionService visionDescriptionService,
+            TextChunkingService textChunkingService,
+            ChunkFigureRefService chunkFigureRefService,
+            SectionRepository sectionRepository,
+            ChapterRepository chapterRepository,
+            FigureRepository figureRepository,
+            ChunkFigureRefRepository chunkFigureRefRepository,
+            FigureStorageService figureStorageService) {
        this.vectorStore = vectorStore;
        this.bookRepository = bookRepository;
+        this.pdfStructureParser = pdfStructureParser;
+        this.figureExtractionService = figureExtractionService;
+        this.visionDescriptionService = visionDescriptionService;
+        this.textChunkingService = textChunkingService;
+        this.chunkFigureRefService = chunkFigureRefService;
+        this.sectionRepository = sectionRepository;
+        this.chapterRepository = chapterRepository;
+        this.figureRepository = figureRepository;
+        this.chunkFigureRefRepository = chunkFigureRefRepository;
+        this.figureStorageService = figureStorageService;
    }

    @Async
    public void embedBook(UUID bookId, String bookTitle, Path pdfPath) {
-        log.info("Starting embedding for book {} ({})", bookId, bookTitle);
+        log.info("Starting image-aware embedding for book {} ({})", bookId, bookTitle);

        Book book = bookRepository.findById(bookId).orElse(null);
        if (book == null) {
@@ -47,29 +81,68 @@ public class BookEmbeddingService {
            book.setStatus(BookStatus.PROCESSING);
            bookRepository.save(book);

-            PagePdfDocumentReader reader = new PagePdfDocumentReader(
-                new FileSystemResource(pdfPath.toFile()),
-                PdfDocumentReaderConfig.builder()
-                    .withPagesPerDocument(1)
-                    .build()
-            );
+            // Step 1: Parse PDF into page-level sections persisted in Postgres
+            List<SectionEntity> sections = pdfStructureParser.parse(bookId, bookTitle, pdfPath);
+            String chapterId = bookId + "-ch1";

-            List<Document> pages = reader.get();
-            int pageCount = pages.size();
+            // Step 2: Build and embed text chunks for all sections in batches
+            List<Document> allChunks = new ArrayList<>();
+            for (SectionEntity section : sections) {
+                List<Document> chunks = textChunkingService.chunk(section, bookTitle);
+                allChunks.addAll(chunks);
+            }
+            embedInBatches(allChunks, bookId);
+            log.info("Embedded {} text chunks for book {}", allChunks.size(), bookId);

-            // Enrich metadata and tag diagram captions
-            List<Document> enriched = pages.stream()
-                .map(doc -> enrichDocument(doc, bookId.toString(), bookTitle))
-                .toList();
+            // Step 3: Extract images from the PDF, save to file store, persist FigureEntity
+            List<FigureEntity> figures = figureExtractionService.extract(
+                bookId, chapterId, sections, pdfPath);

-            vectorStore.add(enriched);
+            // Step 4: For each figure, generate vision description and embed caption
+            for (FigureEntity figure : figures) {
+                Path imagePath = figureStorageService.resolve(figure.getImagePath());
+                String description = visionDescriptionService.describe(
+                    imagePath, figure.getCaption());
+
+                // Use description as caption fallback if no caption was detected
+                if (figure.getCaption() == null || figure.getCaption().isBlank()) {
+                    figure.setCaption(description);
+                    figureRepository.save(figure);
+                }
+
+                // Content for embedding = vision description + caption for maximum signal
+                String embeddingContent = description
+                    + (figure.getCaption() != null ? "\n" + figure.getCaption() : "");
+
+                String embeddingId = UUID.randomUUID().toString();
+                Map<String, Object> metadata = buildFigureMetadata(figure, bookTitle, embeddingId);
+                Document figureDoc = new Document(embeddingId, embeddingContent, metadata);
+                vectorStore.add(List.of(figureDoc));
+
+                figure.setCaptionEmbeddingId(UUID.fromString(embeddingId));
+                figureRepository.save(figure);
+            }
+            log.info("Embedded {} figure captions for book {}", figures.size(), bookId);
+
+            // Step 5: Link text chunks to figures via text references
+            for (SectionEntity section : sections) {
+                List<Document> sectionChunks = allChunks.stream()
+                    .filter(d -> section.getId().equals(d.getMetadata().get("section_id")))
+                    .toList();
+                List<FigureEntity> sectionFigures = figures.stream()
+                    .filter(f -> section.getId().equals(f.getSectionId()))
+                    .toList();
+                chunkFigureRefService.linkChunksToFigures(
+                    sectionChunks, sectionFigures, section.getPageStart());
+            }

            book.setStatus(BookStatus.READY);
-            book.setPageCount(pageCount);
-            book.setProcessedAt(java.time.Instant.now());
+            book.setPageCount(sections.size());
+            book.setProcessedAt(Instant.now());
            bookRepository.save(book);

-            log.info("Finished embedding book {} — {} pages", bookId, pageCount);
+            log.info("Finished embedding book {} — {} pages, {} figures",
+                bookId, sections.size(), figures.size());

        } catch (Exception ex) {
            log.error("Failed to embed book {}", bookId, ex);
@@ -79,40 +152,74 @@ public class BookEmbeddingService {
        }
    }

-    private Document enrichDocument(Document doc, String bookId, String bookTitle) {
-        String content = doc.getText();
-        String chunkType = detectChunkType(content);
+    @Transactional
+    public void deleteBookChunks(UUID bookId) {
+        log.info("Deleting all data for book {}", bookId);
+        try {
+            // Delete chunk-figure refs (by figureId for this book)
+            List<String> figureIds = figureRepository.findAllByBookId(bookId)
+                .stream().map(FigureEntity::getId).toList();
+            if (!figureIds.isEmpty()) {
+                chunkFigureRefRepository.deleteByFigureIdIn(figureIds);
+            }

-        doc.getMetadata().put("book_id", bookId);
-        doc.getMetadata().put("book_title", bookTitle);
-        doc.getMetadata().put("chunk_type", chunkType);
+            // Delete figures from Postgres
+            figureRepository.deleteAllByBookId(bookId);

-        return doc;
+            // Delete figure files from disk
+            figureStorageService.deleteAll(bookId);
+
+            // Delete sections and chapters from Postgres
+            sectionRepository.deleteAllByBookId(bookId);
+            chapterRepository.deleteAllByBookId(bookId);
+
+            // Delete vector store entries (text chunks + figure embeddings)
+            FilterExpressionBuilder b = new FilterExpressionBuilder();
+            vectorStore.delete(b.eq("book_id", bookId.toString()).build());
+
+        } catch (Exception ex) {
+            log.warn("Error during cleanup for book {}: {}", bookId, ex.getMessage());
+        }
    }

-    private String detectChunkType(String content) {
-        if (content != null) {
-            for (String line : content.split("\\r?\\n")) {
-                if (CAPTION_PATTERN.matcher(line.trim()).find()) {
-                    return "diagram";
+    private void embedInBatches(List<Document> docs, UUID bookId) {
+        int total = docs.size();
+        for (int i = 0; i < total; i += embeddingBatchSize) {
+            List<Document> batch = docs.subList(i, Math.min(i + embeddingBatchSize, total));
+            vectorStore.add(batch);
+            int batchNum = i / embeddingBatchSize + 1;
+            int totalBatches = (total - 1) / embeddingBatchSize + 1;
+            log.debug("Embedded batch {}/{} for book {}", batchNum, totalBatches, bookId);
+            if (i + embeddingBatchSize < total) {
+                try {
+                    Thread.sleep(embeddingBatchDelayMs);
+                } catch (InterruptedException e) {
+                    Thread.currentThread().interrupt();
+                    log.warn("Embedding batch sleep interrupted for book {}", bookId);
                }
            }
        }
-        return "text";
    }

-    public void deleteBookChunks(UUID bookId) {
-        log.info("Deleting vector chunks for book {}", bookId);
-        try {
-            FilterExpressionBuilder b = new FilterExpressionBuilder();
-            vectorStore.delete(b.eq("book_id", bookId.toString()).build());
-        } catch (Exception ex) {
-            log.warn("Could not delete vector chunks for book {}: {}", bookId, ex.getMessage());
-        }
+    private Map<String, Object> buildFigureMetadata(FigureEntity figure, String bookTitle,
+                                                     String embeddingId) {
+        Map<String, Object> m = new HashMap<>();
+        m.put("type", "FIGURE");
+        m.put("book_id", figure.getBookId().toString());
+        m.put("book_title", bookTitle);
+        m.put("chapter_id", figure.getChapterId() != null ? figure.getChapterId() : "");
+        m.put("section_id", figure.getSectionId() != null ? figure.getSectionId() : "");
+        m.put("figure_id", figure.getId());
+        m.put("figure_type", figure.getFigureType().name());
+        m.put("image_path", figure.getImagePath());
+        m.put("label", figure.getLabel() != null ? figure.getLabel() : "");
+        m.put("page", figure.getPage());
+        m.put("embedding_id", embeddingId);
+        return m;
    }

-    private String truncate(String message, int maxLength) {
-        if (message == null) return null;
-        return message.length() <= maxLength ? message : message.substring(0, maxLength);
+    private String truncate(String msg, int max) {
+        if (msg == null) return null;
+        return msg.length() <= max ? msg : msg.substring(0, max);
    }
 }
@@ -1,11 +1,13 @@
 package com.aiteacher.book;

+import org.springframework.beans.factory.annotation.Value;
 import org.springframework.stereotype.Service;
 import org.springframework.web.multipart.MultipartFile;

 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
+import java.nio.file.Paths;
 import java.util.List;
 import java.util.NoSuchElementException;
 import java.util.UUID;
@@ -15,10 +17,15 @@ public class BookService {

    private final BookRepository bookRepository;
    private final BookEmbeddingService bookEmbeddingService;
+    private final Path bookStoragePath;

-    public BookService(BookRepository bookRepository, BookEmbeddingService bookEmbeddingService) {
+    public BookService(
+            BookRepository bookRepository,
+            BookEmbeddingService bookEmbeddingService,
+            @Value("${app.figure-storage.base-path:./uploads}") String basePath) {
        this.bookRepository = bookRepository;
        this.bookEmbeddingService = bookEmbeddingService;
+        this.bookStoragePath = Paths.get(basePath).toAbsolutePath().normalize().resolve("books");
    }

    public Book upload(MultipartFile file) throws IOException {
@@ -28,20 +35,35 @@ public class BookService {
        }

        String title = deriveTitle(originalFilename);
-
        Book book = new Book(title, originalFilename, file.getSize());
        book = bookRepository.save(book);

-        // Write to a temp file so the async task can read it
-        Path tempFile = Files.createTempFile("aiteacher-", "-" + book.getId() + ".pdf");
-        file.transferTo(tempFile.toFile());
+        // Persist PDF in a stable location for potential re-embedding
+        Files.createDirectories(bookStoragePath);
+        Path pdfPath = bookStoragePath.resolve(book.getId() + ".pdf");
+        file.transferTo(pdfPath.toFile());

        UUID bookId = book.getId();
-        Path pdfPath = tempFile;
-        String bookTitle = title;
+        bookEmbeddingService.embedBook(bookId, title, pdfPath);
+        return book;
+    }

-        bookEmbeddingService.embedBook(bookId, bookTitle, pdfPath);
+    public Book reembed(UUID id) {
+        Book book = bookRepository.findById(id)
+            .orElseThrow(() -> new NoSuchElementException("Book not found."));

+        if (book.getStatus() == BookStatus.PROCESSING) {
+            throw new IllegalStateException("Book is already being processed.");
+        }
+
+        Path pdfPath = bookStoragePath.resolve(id + ".pdf");
+        if (!Files.exists(pdfPath)) {
+            throw new IllegalStateException(
+                "Original PDF not found. Please re-upload the book before re-embedding.");
+        }
+
+        bookEmbeddingService.deleteBookChunks(id);
+        bookEmbeddingService.embedBook(id, book.getTitle(), pdfPath);
        return book;
    }

@@ -63,14 +85,21 @@ public class BookService {
        }

        bookEmbeddingService.deleteBookChunks(id);
+
+        // Delete the stored PDF
+        Path pdfPath = bookStoragePath.resolve(id + ".pdf");
+        try {
+            Files.deleteIfExists(pdfPath);
+        } catch (IOException ex) {
+            // Non-fatal — log only
+        }
+
        bookRepository.deleteById(id);
    }

    private String deriveTitle(String filename) {
-        // Strip .pdf extension and replace separators with spaces
        String name = filename.replaceAll("(?i)\\.pdf$", "");
        name = name.replaceAll("[-_]", " ");
-        // Capitalise first letter
        if (!name.isEmpty()) {
            name = Character.toUpperCase(name.charAt(0)) + name.substring(1);
        }
@@ -0,0 +1,12 @@
+package com.aiteacher.book;
+
+public record FigureResponse(
+    String figureId,
+    String label,
+    String caption,
+    String figureType,
+    int page,
+    String imageUrl,
+    String sectionId,
+    String sectionTitle
+) {}
@@ -3,22 +3,16 @@ package com.aiteacher.chat;
 import com.aiteacher.book.BookRepository;
 import com.aiteacher.book.BookStatus;
 import com.aiteacher.book.NoKnowledgeSourceException;
+import com.aiteacher.document.FigureEntity;
+import com.aiteacher.document.SectionEntity;
+import com.aiteacher.retrieval.NeurosurgeryRetriever;
+import com.aiteacher.retrieval.RetrievalResult;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.springframework.ai.chat.client.ChatClient;
-import org.springframework.ai.chat.client.advisor.vectorstore.QuestionAnswerAdvisor;
-import org.springframework.ai.chat.model.ChatResponse;
-import org.springframework.ai.document.Document;
-import org.springframework.ai.vectorstore.SearchRequest;
-import org.springframework.ai.vectorstore.VectorStore;
 import org.springframework.stereotype.Service;

-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.NoSuchElementException;
-import java.util.UUID;
+import java.util.*;

@Service
 public class ChatService {
@@ -35,26 +29,28 @@ public class ChatService {
        - Build answers from what is present: procedures, conditions, techniques, and descriptions all contribute; combine them into a rich, structured response
        - Use clear structure: headings, bullet points, or numbered steps where appropriate to maximize clarity
        - Only say you cannot answer if the context is entirely unrelated to the question
-        - Cite sources for each major point (book title and page number from the context metadata)
+        - Cite sources for each major point (book title and page number from the context)
+        - When referencing diagrams or figures, cite them as [Fig. X, p.N]
        - Maintain continuity with the conversation history
        - Never fabricate clinical information not present in the context
        """;

    private final ChatClient chatClient;
-    private final VectorStore vectorStore;
    private final BookRepository bookRepository;
    private final ChatSessionRepository sessionRepository;
    private final MessageRepository messageRepository;
+    private final NeurosurgeryRetriever retriever;

-    public ChatService(ChatClient chatClient, VectorStore vectorStore,
+    public ChatService(ChatClient chatClient,
                       BookRepository bookRepository,
                       ChatSessionRepository sessionRepository,
-                       MessageRepository messageRepository) {
+                       MessageRepository messageRepository,
+                       NeurosurgeryRetriever retriever) {
        this.chatClient = chatClient;
-        this.vectorStore = vectorStore;
        this.bookRepository = bookRepository;
        this.sessionRepository = sessionRepository;
        this.messageRepository = messageRepository;
+        this.retriever = retriever;
    }

    public ChatSession createSession(String topicId) {
@@ -73,7 +69,11 @@ public class ChatService {
        ChatSession session = sessionRepository.findById(sessionId)
            .orElseThrow(() -> new NoSuchElementException("Session not found."));

-        if (!bookRepository.existsByStatus(BookStatus.READY)) {
+        List<com.aiteacher.book.Book> readyBooks = bookRepository.findAll().stream()
+            .filter(b -> b.getStatus() == BookStatus.READY)
+            .toList();
+
+        if (readyBooks.isEmpty()) {
            throw new NoKnowledgeSourceException("No books are available as knowledge sources.");
        }

@@ -81,27 +81,31 @@ public class ChatService {
        Message userMessage = new Message(sessionId, MessageRole.USER, userContent);
        messageRepository.save(userMessage);

-        // Build conversation history for context
+        // Build full question with conversation history
        List<Message> history = messageRepository.findBySessionIdOrderByCreatedAtAsc(sessionId);
-
-        // Build the prompt with full conversation history as context
        String fullQuestion = buildQuestionWithHistory(history, userContent, session.getTopicId());

-        var qaAdvisor = QuestionAnswerAdvisor.builder(vectorStore)
-        .searchRequest(SearchRequest.builder().similarityThreshold(0.5d).topK(6).build())
-        .build();
-        
-        ChatResponse response = chatClient.prompt()
-            .advisors(qaAdvisor)
+        // Retrieve context from all ready books (aggregate across books)
+        List<SectionEntity> allSections = new ArrayList<>();
+        List<FigureEntity> allFigures = new ArrayList<>();
+        for (com.aiteacher.book.Book book : readyBooks) {
+            RetrievalResult result = retriever.retrieve(fullQuestion, book.getId());
+            allSections.addAll(result.parentSections());
+            allFigures.addAll(result.figures());
+        }
+
+        // Build LLM prompt with section full texts and figure references
+        String contextPrompt = buildContextPrompt(fullQuestion, allSections, allFigures);
+
+        String assistantContent = chatClient.prompt()
            .system(SYSTEM_PROMPT)
-            .user(fullQuestion)
+            .user(contextPrompt)
            .call()
-            .chatResponse();
+            .content();

-        String assistantContent = response.getResult().getOutput().getText();
-        List<Map<String, Object>> sources = extractSources(response);
+        // Build sources list with TEXT and FIGURE entries
+        List<Map<String, Object>> sources = buildSources(allSections, allFigures);

-        // Persist assistant message
        Message assistantMessage = new Message(sessionId, MessageRole.ASSISTANT, assistantContent);
        assistantMessage.setSources(sources);
        return messageRepository.save(assistantMessage);
@@ -118,24 +122,95 @@ public class ChatService {
        sessionRepository.deleteById(sessionId);
    }

+    // -------------------------------------------------------------------------
+    // Private helpers
+    // -------------------------------------------------------------------------
+
+    private String buildContextPrompt(String question,
+                                      List<SectionEntity> sections,
+                                      List<FigureEntity> figures) {
+        StringBuilder sb = new StringBuilder();
+
+        if (!sections.isEmpty()) {
+            sb.append("CONTEXT:\n\n");
+            for (SectionEntity section : sections) {
+                sb.append("[").append(section.getTitle())
+                  .append(", p.").append(section.getPageStart()).append("]\n");
+                sb.append(section.getFullText()).append("\n\n");
+            }
+        }
+
+        if (!figures.isEmpty()) {
+            sb.append("AVAILABLE FIGURES:\n");
+            for (FigureEntity figure : figures) {
+                sb.append("- ").append(figure.getLabel() != null ? figure.getLabel() : "Figure")
+                  .append(" (p.").append(figure.getPage()).append("): ")
+                  .append(figure.getCaption() != null ? figure.getCaption() : "")
+                  .append("\n");
+            }
+            sb.append("\nWhen referencing diagrams, cite them as [Fig. X, p.N].\n\n");
+        }
+
+        sb.append("QUESTION:\n").append(question);
+        return sb.toString();
+    }
+
+    private List<Map<String, Object>> buildSources(List<SectionEntity> sections,
+                                                    List<FigureEntity> figures) {
+        List<Map<String, Object>> sources = new ArrayList<>();
+
+        for (SectionEntity section : sections) {
+            Map<String, Object> source = new LinkedHashMap<>();
+            source.put("type", "TEXT");
+            source.put("bookTitle", deriveTitleFromSection(section));
+            source.put("page", section.getPageStart());
+            source.put("chunkText", truncate(section.getFullText(), 500));
+            sources.add(source);
+        }
+
+        for (FigureEntity figure : figures) {
+            Map<String, Object> source = new LinkedHashMap<>();
+            source.put("type", "FIGURE");
+            source.put("bookTitle", bookRepository.findById(figure.getBookId())
+                .map(com.aiteacher.book.Book::getTitle).orElse("Book"));
+            source.put("page", figure.getPage());
+            source.put("figureId", figure.getId());
+            source.put("label", figure.getLabel() != null ? figure.getLabel() : "");
+            source.put("caption", figure.getCaption() != null ? figure.getCaption() : "");
+            source.put("figureType", figure.getFigureType().name());
+            // imageUrl assembled from relative path: figures/{bookId}/{filename}
+            String filename = figure.getImagePath().substring(
+                figure.getImagePath().lastIndexOf('/') + 1);
+            source.put("imageUrl", "/api/v1/figures/" + figure.getBookId() + "/" + filename);
+            sources.add(source);
+        }
+
+        return sources;
+    }
+
+    private String deriveTitleFromSection(SectionEntity section) {
+        if (section == null) return "Book";
+        return bookRepository.findById(section.getBookId())
+            .map(com.aiteacher.book.Book::getTitle)
+            .orElse("Book");
+    }
+
    private String buildQuestionWithHistory(List<Message> history, String currentQuestion,
                                            String topicId) {
        boolean hasTopic = topicId != null && !topicId.equals("free-form");

        if (history.size() <= 1) {
            return hasTopic
-                ? String.format("[Context: This is a question about the neurosurgery topic '%s']\n%s",
+                ? String.format("[Context: question about neurosurgery topic '%s']\n%s",
                    topicId, currentQuestion)
                : currentQuestion;
        }

        StringBuilder sb = new StringBuilder();
        if (hasTopic) {
-            sb.append(String.format("[Context: This conversation is about the neurosurgery topic '%s']\n\n",
-                topicId));
+            sb.append(String.format("[Context: conversation about '%s']\n\n", topicId));
        }
        sb.append("Previous conversation:\n");
-        // Include all messages except the last (which is the current user message just saved)
        for (int i = 0; i < history.size() - 1; i++) {
            Message msg = history.get(i);
            sb.append(msg.getRole().name()).append(": ").append(msg.getContent()).append("\n");
@@ -144,30 +219,8 @@ public class ChatService {
        return sb.toString();
    }

-    private List<Map<String, Object>> extractSources(ChatResponse response) {
-        List<Map<String, Object>> sources = new ArrayList<>();
-
-        if (response.getMetadata() != null) {
-            Object retrieved = response.getMetadata().get(QuestionAnswerAdvisor.RETRIEVED_DOCUMENTS);
-            if (retrieved instanceof List<?> docs) {
-                for (Object docObj : docs) {
-                    if (docObj instanceof Document doc) {
-                        Map<String, Object> metadata = doc.getMetadata();
-                        String bookTitle = (String) metadata.get("book_title");
-                        Object pageObj = metadata.get("page_number");
-                        Integer page = pageObj instanceof Number n ? n.intValue() : null;
-                        if (bookTitle != null) {
-                            Map<String, Object> source = new HashMap<>();
-                            source.put("bookTitle", bookTitle);
-                            source.put("page", page);
-                            source.put("chunkText", doc.getText());
-                            sources.add(source);
-                        }
-                    }
-                }
-            }
-        }
-
-        return sources;
+    private String truncate(String text, int maxChars) {
+        if (text == null) return "";
+        return text.length() <= maxChars ? text : text.substring(0, maxChars) + "…";
    }
 }
@@ -0,0 +1,25 @@
+package com.aiteacher.config;
+
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.context.annotation.Configuration;
+import org.springframework.web.servlet.config.annotation.ResourceHandlerRegistry;
+import org.springframework.web.servlet.config.annotation.WebMvcConfigurer;
+
+import java.nio.file.Paths;
+
+@Configuration
+public class FigureStorageConfig implements WebMvcConfigurer {
+
+    private final String basePath;
+
+    public FigureStorageConfig(@Value("${app.figure-storage.base-path:./uploads}") String basePath) {
+        this.basePath = Paths.get(basePath).toAbsolutePath().normalize().toString();
+    }
+
+    @Override
+    public void addResourceHandlers(ResourceHandlerRegistry registry) {
+        // Serve GET /api/v1/figures/** from the local file store
+        registry.addResourceHandler("/api/v1/figures/**")
+                .addResourceLocations("file:" + basePath + "/figures/");
+    }
+}
@@ -0,0 +1,47 @@
+package com.aiteacher.document;
+
+import jakarta.persistence.*;
+import java.time.Instant;
+import java.util.UUID;
+
+@Entity
+@Table(name = "chapter")
+public class ChapterEntity {
+
+    @Id
+    @Column(name = "id", length = 200)
+    private String id;
+
+    @Column(name = "book_id", nullable = false)
+    private UUID bookId;
+
+    @Column(name = "number", nullable = false)
+    private int number;
+
+    @Column(name = "title", length = 500)
+    private String title;
+
+    @Column(name = "page_start")
+    private Integer pageStart;
+
+    @Column(name = "created_at", nullable = false)
+    private Instant createdAt;
+
+    public ChapterEntity() {}
+
+    public ChapterEntity(String id, UUID bookId, int number, String title, Integer pageStart) {
+        this.id = id;
+        this.bookId = bookId;
+        this.number = number;
+        this.title = title;
+        this.pageStart = pageStart;
+        this.createdAt = Instant.now();
+    }
+
+    public String getId() { return id; }
+    public UUID getBookId() { return bookId; }
+    public int getNumber() { return number; }
+    public String getTitle() { return title; }
+    public Integer getPageStart() { return pageStart; }
+    public Instant getCreatedAt() { return createdAt; }
+}
@@ -0,0 +1,9 @@
+package com.aiteacher.document;
+
+import org.springframework.data.jpa.repository.JpaRepository;
+
+import java.util.UUID;
+
+public interface ChapterRepository extends JpaRepository<ChapterEntity, String> {
+    void deleteAllByBookId(UUID bookId);
+}
@@ -0,0 +1,58 @@
+package com.aiteacher.document;
+
+import jakarta.persistence.*;
+import java.io.Serializable;
+import java.util.Objects;
+import java.util.UUID;
+
+@Entity
+@Table(name = "chunk_figure_ref")
+@IdClass(ChunkFigureRefEntity.PK.class)
+public class ChunkFigureRefEntity {
+
+    @Id
+    @Column(name = "chunk_id", nullable = false)
+    private UUID chunkId;
+
+    @Id
+    @Column(name = "figure_id", nullable = false, length = 200)
+    private String figureId;
+
+    @Column(name = "mention_page")
+    private Integer mentionPage;
+
+    public ChunkFigureRefEntity() {}
+
+    public ChunkFigureRefEntity(UUID chunkId, String figureId, Integer mentionPage) {
+        this.chunkId = chunkId;
+        this.figureId = figureId;
+        this.mentionPage = mentionPage;
+    }
+
+    public UUID getChunkId() { return chunkId; }
+    public String getFigureId() { return figureId; }
+    public Integer getMentionPage() { return mentionPage; }
+
+    public static class PK implements Serializable {
+        private UUID chunkId;
+        private String figureId;
+
+        public PK() {}
+        public PK(UUID chunkId, String figureId) {
+            this.chunkId = chunkId;
+            this.figureId = figureId;
+        }
+
+        @Override
+        public boolean equals(Object o) {
+            if (this == o) return true;
+            if (!(o instanceof PK pk)) return false;
+            return Objects.equals(chunkId, pk.chunkId) && Objects.equals(figureId, pk.figureId);
+        }
+
+        @Override
+        public int hashCode() {
+            return Objects.hash(chunkId, figureId);
+        }
+    }
+}
@@ -0,0 +1,18 @@
+package com.aiteacher.document;
+
+import org.springframework.data.jpa.repository.JpaRepository;
+import org.springframework.data.jpa.repository.Query;
+import org.springframework.data.repository.query.Param;
+
+import java.util.List;
+import java.util.UUID;
+
+public interface ChunkFigureRefRepository extends JpaRepository<ChunkFigureRefEntity, ChunkFigureRefEntity.PK> {
+
+    @Query("SELECT r FROM ChunkFigureRefEntity r WHERE r.chunkId IN :chunkIds")
+    List<ChunkFigureRefEntity> findByChunkIdIn(@Param("chunkIds") List<UUID> chunkIds);
+
+    @Query("DELETE FROM ChunkFigureRefEntity r WHERE r.figureId IN :figureIds")
+    @org.springframework.data.jpa.repository.Modifying
+    void deleteByFigureIdIn(@Param("figureIds") List<String> figureIds);
+}
@@ -0,0 +1,62 @@
+package com.aiteacher.document;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.springframework.ai.document.Document;
+import org.springframework.stereotype.Service;
+
+import java.util.List;
+import java.util.UUID;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Scans chunk text for "Fig. X" and "Figure X" references and persists
+ * ChunkFigureRefEntity rows linking that chunk to its referenced figures.
+ */
+@Service
+public class ChunkFigureRefService {
+
+    private static final Logger log = LoggerFactory.getLogger(ChunkFigureRefService.class);
+
+    // Matches: "Fig. 12-4", "Fig. 12.4", "Fig 12", "Figure 12-4", etc.
+    private static final Pattern REF_PATTERN =
+        Pattern.compile("(?i)\\b(Fig\\.?|Figure)\\s+(\\d+[\\-.\\d]*)");
+
+    private final ChunkFigureRefRepository refRepository;
+
+    public ChunkFigureRefService(ChunkFigureRefRepository refRepository) {
+        this.refRepository = refRepository;
+    }
+
+    /**
+     * For each text chunk, finds figure references and persists ChunkFigureRefEntity rows.
+     */
+    public void linkChunksToFigures(List<Document> chunks, List<FigureEntity> bookFigures,
+                                    int pageNum) {
+        if (bookFigures.isEmpty()) return;
+
+        for (Document chunk : chunks) {
+            String chunkIdStr = chunk.getId();
+            UUID chunkId;
+            try {
+                chunkId = UUID.fromString(chunkIdStr);
+            } catch (IllegalArgumentException ex) {
+                log.warn("Chunk has non-UUID id: {}", chunkIdStr);
+                continue;
+            }
+
+            Matcher m = REF_PATTERN.matcher(chunk.getText());
+            while (m.find()) {
+                String refNum = m.group(2).trim();
+                // Find matching figure by label suffix
+                for (FigureEntity figure : bookFigures) {
+                    if (figure.getLabel() != null && figure.getLabel().endsWith(refNum)) {
+                        refRepository.save(new ChunkFigureRefEntity(chunkId, figure.getId(), pageNum));
+                        break;
+                    }
+                }
+            }
+        }
+    }
+}
@@ -0,0 +1,82 @@
+package com.aiteacher.document;
+
+import jakarta.persistence.*;
+import java.time.Instant;
+import java.util.UUID;
+
+@Entity
+@Table(name = "figure")
+public class FigureEntity {
+
+    @Id
+    @Column(name = "id", length = 200)
+    private String id;
+
+    @Column(name = "book_id", nullable = false)
+    private UUID bookId;
+
+    @Column(name = "section_id", length = 200)
+    private String sectionId;
+
+    @Column(name = "chapter_id", length = 200)
+    private String chapterId;
+
+    @Column(name = "label", length = 100)
+    private String label;
+
+    @Column(name = "caption", columnDefinition = "TEXT")
+    private String caption;
+
+    @Enumerated(EnumType.STRING)
+    @Column(name = "figure_type", nullable = false, length = 50)
+    private FigureType figureType;
+
+    @Column(name = "page", nullable = false)
+    private int page;
+
+    @Column(name = "image_path", nullable = false, length = 1000)
+    private String imagePath;
+
+    @Column(name = "caption_embedding_id")
+    private UUID captionEmbeddingId;
+
+    @Column(name = "created_at", nullable = false)
+    private Instant createdAt;
+
+    public FigureEntity() {}
+
+    public FigureEntity(String id, UUID bookId, String sectionId, String chapterId,
+                        String label, String caption, FigureType figureType,
+                        int page, String imagePath) {
+        this.id = id;
+        this.bookId = bookId;
+        this.sectionId = sectionId;
+        this.chapterId = chapterId;
+        this.label = label;
+        this.caption = caption;
+        this.figureType = figureType;
+        this.page = page;
+        this.imagePath = imagePath;
+        this.createdAt = Instant.now();
+    }
+
+    public String getId() { return id; }
+    public UUID getBookId() { return bookId; }
+    public String getSectionId() { return sectionId; }
+    public String getChapterId() { return chapterId; }
+    public String getLabel() { return label; }
+    public String getCaption() { return caption; }
+    public FigureType getFigureType() { return figureType; }
+    public int getPage() { return page; }
+    public String getImagePath() { return imagePath; }
+    public UUID getCaptionEmbeddingId() { return captionEmbeddingId; }
+    public Instant getCreatedAt() { return createdAt; }
+
+    public void setCaptionEmbeddingId(UUID captionEmbeddingId) {
+        this.captionEmbeddingId = captionEmbeddingId;
+    }
+
+    public void setCaption(String caption) {
+        this.caption = caption;
+    }
+}
@@ -0,0 +1,135 @@
+package com.aiteacher.document;
+
+import com.aiteacher.figure.FigureStorageService;
+import org.apache.pdfbox.Loader;
+import org.apache.pdfbox.cos.COSName;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.PDPage;
+import org.apache.pdfbox.pdmodel.graphics.PDXObject;
+import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.stereotype.Service;
+
+import java.awt.image.BufferedImage;
+import java.io.IOException;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.UUID;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Extracts images from each PDF page using PDFBox.
+ * Images below the configured minimum size are skipped.
+ * Caption is detected by the "Fig." pattern in page text.
+ */
+@Service
+public class FigureExtractionService {
+
+    private static final Logger log = LoggerFactory.getLogger(FigureExtractionService.class);
+
+    // Caption: line starting with "Fig." or "Figure" followed by a number
+    private static final Pattern CAPTION_PATTERN =
+        Pattern.compile("(?m)^(Fig\\.?\\s*\\d+[\\-.]?\\d*[^\\n]*)", Pattern.CASE_INSENSITIVE);
+
+    // Figure label: "Fig. 12-4" or "Fig. 12.4"
+    private static final Pattern LABEL_PATTERN =
+        Pattern.compile("(?i)Fig\\.?\\s*(\\d+[\\-.\\d]*)");
+
+    private final FigureStorageService storageService;
+    private final FigureRepository figureRepository;
+    private final int minImageSizePx;
+
+    public FigureExtractionService(
+            FigureStorageService storageService,
+            FigureRepository figureRepository,
+            @Value("${app.figure-storage.min-image-size-px:100}") int minImageSizePx) {
+        this.storageService = storageService;
+        this.figureRepository = figureRepository;
+        this.minImageSizePx = minImageSizePx;
+    }
+
+    /**
+     * Extracts all qualifying images from the PDF for the given book.
+     * Returns persisted FigureEntity list (without vision descriptions — set later).
+     */
+    public List<FigureEntity> extract(UUID bookId, String chapterId,
+                                      List<SectionEntity> sections, Path pdfPath) {
+        List<FigureEntity> figures = new ArrayList<>();
+        int figureCounter = 0;
+
+        try (PDDocument doc = Loader.loadPDF(pdfPath.toFile())) {
+            for (SectionEntity section : sections) {
+                int pageIndex = section.getPageStart() - 1; // 0-based
+                if (pageIndex < 0 || pageIndex >= doc.getNumberOfPages()) continue;
+
+                PDPage page = doc.getPage(pageIndex);
+                String pageText = section.getFullText();
+
+                try {
+                    for (COSName name : page.getResources().getXObjectNames()) {
+                        PDXObject xObject = page.getResources().getXObject(name);
+                        if (!(xObject instanceof PDImageXObject image)) continue;
+
+                        BufferedImage bufferedImage = image.getImage();
+                        if (bufferedImage.getWidth() < minImageSizePx
+                                || bufferedImage.getHeight() < minImageSizePx) {
+                            continue; // skip decorative images
+                        }
+
+                        figureCounter++;
+                        String figureId = bookId + "-fig-" + pageIndex + "-" + figureCounter;
+                        String caption = detectCaption(pageText);
+                        String label = detectLabel(caption, figureCounter);
+                        FigureType type = classifyType(caption, pageText);
+
+                        String imagePath = storageService.save(bookId, figureId, bufferedImage);
+
+                        FigureEntity figure = new FigureEntity(
+                            figureId, bookId, section.getId(), chapterId,
+                            label, caption, type, section.getPageStart(), imagePath
+                        );
+                        figures.add(figureRepository.save(figure));
+                    }
+                } catch (IOException ex) {
+                    log.warn("Failed to extract images from page {} of book {}: {}",
+                        section.getPageStart(), bookId, ex.getMessage());
+                }
+            }
+        } catch (IOException ex) {
+            log.error("Could not open PDF for image extraction, book {}", bookId, ex);
+        }
+
+        log.info("Extracted {} figures for book {}", figures.size(), bookId);
+        return figures;
+    }
+
+    private String detectCaption(String pageText) {
+        if (pageText == null) return null;
+        Matcher m = CAPTION_PATTERN.matcher(pageText);
+        return m.find() ? m.group(1).trim() : null;
+    }
+
+    private String detectLabel(String caption, int counter) {
+        if (caption != null) {
+            Matcher m = LABEL_PATTERN.matcher(caption);
+            if (m.find()) return "Fig. " + m.group(1).trim();
+        }
+        return "Fig. " + counter;
+    }
+
+    private FigureType classifyType(String caption, String pageText) {
+        String combined = ((caption != null ? caption : "") + " " + (pageText != null ? pageText : "")).toLowerCase();
+        if (combined.contains("mri") || combined.contains("ct ") || combined.contains("magnetic")
+                || combined.contains("tomography")) return FigureType.MRI_CT_SCAN;
+        if (combined.contains("intraoperative") || combined.contains("intra-op")) return FigureType.INTRAOPERATIVE_IMAGE;
+        if (caption != null && caption.toLowerCase().startsWith("table")) return FigureType.TABLE;
+        if (combined.contains("chart") || combined.contains("histogram") || combined.contains("graph"))
+            return FigureType.CHART;
+        if (combined.contains("photograph") || combined.contains("photo")) return FigureType.SURGICAL_PHOTOGRAPH;
+        return FigureType.ANATOMICAL_DIAGRAM;
+    }
+}
@@ -0,0 +1,11 @@
+package com.aiteacher.document;
+
+import org.springframework.data.jpa.repository.JpaRepository;
+
+import java.util.List;
+import java.util.UUID;
+
+public interface FigureRepository extends JpaRepository<FigureEntity, String> {
+    List<FigureEntity> findAllByBookId(UUID bookId);
+    void deleteAllByBookId(UUID bookId);
+}
@@ -0,0 +1,10 @@
+package com.aiteacher.document;
+
+public enum FigureType {
+    ANATOMICAL_DIAGRAM,
+    SURGICAL_PHOTOGRAPH,
+    MRI_CT_SCAN,
+    TABLE,
+    CHART,
+    INTRAOPERATIVE_IMAGE
+}
@@ -0,0 +1,71 @@
+package com.aiteacher.document;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.springframework.ai.reader.pdf.PagePdfDocumentReader;
+import org.springframework.ai.reader.pdf.config.PdfDocumentReaderConfig;
+import org.springframework.core.io.FileSystemResource;
+import org.springframework.stereotype.Service;
+import org.springframework.transaction.annotation.Transactional;
+
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.UUID;
+
+/**
+ * Parses a PDF into page-level SectionEntity records stored in Postgres.
+ * Each page becomes one section, grouped under a single chapter per book.
+ */
+@Service
+public class PdfStructureParser {
+
+    private static final Logger log = LoggerFactory.getLogger(PdfStructureParser.class);
+
+    private final ChapterRepository chapterRepository;
+    private final SectionRepository sectionRepository;
+
+    public PdfStructureParser(ChapterRepository chapterRepository,
+                               SectionRepository sectionRepository) {
+        this.chapterRepository = chapterRepository;
+        this.sectionRepository = sectionRepository;
+    }
+
+    @Transactional
+    public List<SectionEntity> parse(UUID bookId, String bookTitle, Path pdfPath) {
+        log.info("Parsing PDF structure for book {}", bookId);
+
+        // One chapter per book
+        String chapterId = bookId + "-ch1";
+        ChapterEntity chapter = new ChapterEntity(chapterId, bookId, 1, bookTitle, 1);
+        chapterRepository.save(chapter);
+
+        // One section per page
+        PagePdfDocumentReader reader = new PagePdfDocumentReader(
+            new FileSystemResource(pdfPath.toFile()),
+            PdfDocumentReaderConfig.builder().withPagesPerDocument(1).build()
+        );
+
+        List<org.springframework.ai.document.Document> pages = reader.get();
+        List<SectionEntity> sections = new ArrayList<>();
+
+        for (int i = 0; i < pages.size(); i++) {
+            int pageNum = i + 1;
+            String text = pages.get(i).getText();
+            if (text == null || text.isBlank()) continue;
+
+            String sectionId = bookId + "-p" + pageNum;
+            SectionEntity section = new SectionEntity(
+                sectionId, chapterId, bookId,
+                String.valueOf(pageNum),
+                "Page " + pageNum,
+                pageNum, pageNum,
+                text
+            );
+            sections.add(sectionRepository.save(section));
+        }
+
+        log.info("Parsed {} sections for book {}", sections.size(), bookId);
+        return sections;
+    }
+}
@@ -0,0 +1,63 @@
+package com.aiteacher.document;
+
+import jakarta.persistence.*;
+import java.time.Instant;
+import java.util.UUID;
+
+@Entity
+@Table(name = "section")
+public class SectionEntity {
+
+    @Id
+    @Column(name = "id", length = 200)
+    private String id;
+
+    @Column(name = "chapter_id", nullable = false, length = 200)
+    private String chapterId;
+
+    @Column(name = "book_id", nullable = false)
+    private UUID bookId;
+
+    @Column(name = "number", length = 50)
+    private String number;
+
+    @Column(name = "title", length = 500)
+    private String title;
+
+    @Column(name = "page_start", nullable = false)
+    private int pageStart;
+
+    @Column(name = "page_end", nullable = false)
+    private int pageEnd;
+
+    @Column(name = "full_text", nullable = false, columnDefinition = "TEXT")
+    private String fullText;
+
+    @Column(name = "created_at", nullable = false)
+    private Instant createdAt;
+
+    public SectionEntity() {}
+
+    public SectionEntity(String id, String chapterId, UUID bookId, String number,
+                         String title, int pageStart, int pageEnd, String fullText) {
+        this.id = id;
+        this.chapterId = chapterId;
+        this.bookId = bookId;
+        this.number = number;
+        this.title = title;
+        this.pageStart = pageStart;
+        this.pageEnd = pageEnd;
+        this.fullText = fullText;
+        this.createdAt = Instant.now();
+    }
+
+    public String getId() { return id; }
+    public String getChapterId() { return chapterId; }
+    public UUID getBookId() { return bookId; }
+    public String getNumber() { return number; }
+    public String getTitle() { return title; }
+    public int getPageStart() { return pageStart; }
+    public int getPageEnd() { return pageEnd; }
+    public String getFullText() { return fullText; }
+    public Instant getCreatedAt() { return createdAt; }
+}
@@ -0,0 +1,11 @@
+package com.aiteacher.document;
+
+import org.springframework.data.jpa.repository.JpaRepository;
+
+import java.util.List;
+import java.util.UUID;
+
+public interface SectionRepository extends JpaRepository<SectionEntity, String> {
+    List<SectionEntity> findAllByBookId(UUID bookId);
+    void deleteAllByBookId(UUID bookId);
+}
@@ -0,0 +1,65 @@
+package com.aiteacher.document;
+
+import org.springframework.ai.document.Document;
+import org.springframework.stereotype.Service;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.UUID;
+
+/**
+ * Splits a SectionEntity's full text into overlapping chunks for vector embedding.
+ * Target size: ~1800 characters (~450 tokens); overlap: 200 characters.
+ */
+@Service
+public class TextChunkingService {
+
+    private static final int TARGET_CHARS = 1800;
+    private static final int OVERLAP_CHARS = 200;
+
+    public List<Document> chunk(SectionEntity section, String bookTitle) {
+        String text = section.getFullText();
+        if (text == null || text.isBlank()) return List.of();
+
+        List<String> windows = split(text);
+        List<Document> documents = new ArrayList<>();
+
+        for (int i = 0; i < windows.size(); i++) {
+            String chunkId = UUID.randomUUID().toString();
+            Map<String, Object> metadata = buildMetadata(section, bookTitle, i, windows.size(), chunkId);
+            documents.add(new Document(chunkId, windows.get(i), metadata));
+        }
+        return documents;
+    }
+
+    private List<String> split(String text) {
+        List<String> windows = new ArrayList<>();
+        int start = 0;
+        while (start < text.length()) {
+            int end = Math.min(start + TARGET_CHARS, text.length());
+            windows.add(text.substring(start, end));
+            if (end == text.length()) break;
+            start = end - OVERLAP_CHARS;
+        }
+        return windows;
+    }
+
+    private Map<String, Object> buildMetadata(SectionEntity section, String bookTitle,
+                                               int index, int total, String chunkId) {
+        Map<String, Object> m = new HashMap<>();
+        m.put("type", "TEXT");
+        m.put("book_id", section.getBookId().toString());
+        m.put("book_title", bookTitle);
+        m.put("chapter_id", section.getChapterId());
+        m.put("section_id", section.getId());
+        m.put("section_title", section.getTitle() != null ? section.getTitle() : "");
+        m.put("page_start", section.getPageStart());
+        m.put("page_end", section.getPageEnd());
+        m.put("chunk_index", index);
+        m.put("total_chunks", total);
+        m.put("chunk_id", chunkId);
+        return m;
+    }
+}
@@ -0,0 +1,49 @@
+package com.aiteacher.document;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.springframework.ai.chat.client.ChatClient;
+import org.springframework.core.io.FileSystemResource;
+import org.springframework.stereotype.Service;
+import org.springframework.util.MimeTypeUtils;
+
+import java.nio.file.Path;
+
+/**
+ * Generates a clinical text description for an extracted figure image
+ * using the OpenAI vision model via Spring AI ChatClient.
+ */
+@Service
+public class VisionDescriptionService {
+
+    private static final Logger log = LoggerFactory.getLogger(VisionDescriptionService.class);
+
+    private static final String PROMPT =
+        "You are a neurosurgery educator. Provide a brief 2-3 sentence clinical description of " +
+        "this image. Focus on anatomical structures, surgical landmarks, labels, and clinical " +
+        "significance. If text or labels are visible, include them verbatim.";
+
+    private final ChatClient chatClient;
+
+    public VisionDescriptionService(ChatClient chatClient) {
+        this.chatClient = chatClient;
+    }
+
+    /**
+     * Returns a description string. Falls back to the provided caption if vision fails.
+     */
+    public String describe(Path imagePath, String captionFallback) {
+        try {
+            return chatClient.prompt()
+                .user(u -> u
+                    .text(PROMPT)
+                    .media(MimeTypeUtils.IMAGE_PNG, new FileSystemResource(imagePath.toFile())))
+                .call()
+                .content();
+        } catch (Exception ex) {
+            log.warn("Vision description failed for {}: {} — using caption as fallback",
+                imagePath.getFileName(), ex.getMessage());
+            return captionFallback != null ? captionFallback : "Figure";
+        }
+    }
+}
@@ -0,0 +1,24 @@
+package com.aiteacher.figure;
+
+import java.awt.image.BufferedImage;
+import java.nio.file.Path;
+import java.util.UUID;
+
+public interface FigureStorageService {
+
+    /**
+     * Saves an extracted image to the figure store and returns the relative path
+     * (relative to the configured base-path) stored in the database.
+     */
+    String save(UUID bookId, String figureId, BufferedImage image);
+
+    /**
+     * Resolves a stored relative path to an absolute filesystem path.
+     */
+    Path resolve(String relativePath);
+
+    /**
+     * Deletes all figure files for the given book.
+     */
+    void deleteAll(UUID bookId);
+}
@@ -0,0 +1,59 @@
+package com.aiteacher.figure;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.stereotype.Service;
+
+import javax.imageio.ImageIO;
+import java.awt.image.BufferedImage;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.UUID;
+
+@Service
+public class LocalFigureStorageService implements FigureStorageService {
+
+    private static final Logger log = LoggerFactory.getLogger(LocalFigureStorageService.class);
+
+    private final Path basePath;
+
+    public LocalFigureStorageService(@Value("${app.figure-storage.base-path:./uploads}") String basePath) {
+        this.basePath = Paths.get(basePath).toAbsolutePath().normalize();
+    }
+
+    @Override
+    public String save(UUID bookId, String figureId, BufferedImage image) {
+        try {
+            Path dir = basePath.resolve("figures").resolve(bookId.toString());
+            Files.createDirectories(dir);
+            String filename = figureId + ".png";
+            Path file = dir.resolve(filename);
+            ImageIO.write(image, "PNG", file.toFile());
+            // Return relative path for storage in DB
+            return "figures/" + bookId + "/" + filename;
+        } catch (IOException ex) {
+            throw new RuntimeException("Failed to save figure " + figureId, ex);
+        }
+    }
+
+    @Override
+    public Path resolve(String relativePath) {
+        return basePath.resolve(relativePath);
+    }
+
+    @Override
+    public void deleteAll(UUID bookId) {
+        Path dir = basePath.resolve("figures").resolve(bookId.toString());
+        if (!Files.exists(dir)) return;
+        try (var walk = Files.walk(dir)) {
+            walk.sorted(java.util.Comparator.reverseOrder())
+                .map(Path::toFile)
+                .forEach(java.io.File::delete);
+        } catch (IOException ex) {
+            log.warn("Could not fully delete figures for book {}: {}", bookId, ex.getMessage());
+        }
+    }
+}
@@ -0,0 +1,111 @@
+package com.aiteacher.retrieval;
+
+import com.aiteacher.document.*;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.springframework.ai.document.Document;
+import org.springframework.ai.vectorstore.SearchRequest;
+import org.springframework.ai.vectorstore.VectorStore;
+import org.springframework.ai.vectorstore.filter.FilterExpressionBuilder;
+import org.springframework.stereotype.Service;
+
+import java.util.*;
+
+/**
+ * Dual-modality retriever: searches text chunks and figure captions independently,
+ * then expands text hits to their parent sections and merges linked figures.
+ */
+@Service
+public class NeurosurgeryRetriever {
+
+    private static final Logger log = LoggerFactory.getLogger(NeurosurgeryRetriever.class);
+
+    private static final int TEXT_TOP_K = 5;
+    private static final int FIGURE_TOP_K = 3;
+
+    private final VectorStore vectorStore;
+    private final SectionRepository sectionRepository;
+    private final FigureRepository figureRepository;
+    private final ChunkFigureRefRepository chunkFigureRefRepository;
+
+    public NeurosurgeryRetriever(VectorStore vectorStore,
+                                  SectionRepository sectionRepository,
+                                  FigureRepository figureRepository,
+                                  ChunkFigureRefRepository chunkFigureRefRepository) {
+        this.vectorStore = vectorStore;
+        this.sectionRepository = sectionRepository;
+        this.figureRepository = figureRepository;
+        this.chunkFigureRefRepository = chunkFigureRefRepository;
+    }
+
+    public RetrievalResult retrieve(String query, UUID bookId) {
+        FilterExpressionBuilder b = new FilterExpressionBuilder();
+
+        // 1. Text chunk search
+        List<Document> textHits = vectorStore.similaritySearch(
+            SearchRequest.builder()
+                .query(query)
+                .topK(TEXT_TOP_K)
+                .filterExpression(b.and(
+                    b.eq("type", "TEXT"),
+                    b.eq("book_id", bookId.toString())
+                ).build())
+                .build()
+        );
+
+        // 2. Figure caption search (independent topK)
+        List<Document> figureHits = vectorStore.similaritySearch(
+            SearchRequest.builder()
+                .query(query)
+                .topK(FIGURE_TOP_K)
+                .filterExpression(b.and(
+                    b.eq("type", "FIGURE"),
+                    b.eq("book_id", bookId.toString())
+                ).build())
+                .build()
+        );
+
+        // 3. Expand text chunks to parent sections from Postgres
+        List<String> sectionIds = textHits.stream()
+            .map(d -> (String) d.getMetadata().get("section_id"))
+            .filter(Objects::nonNull)
+            .distinct()
+            .toList();
+        List<SectionEntity> sections = sectionIds.isEmpty()
+            ? List.of()
+            : sectionRepository.findAllById(sectionIds);
+
+        // 4. Fetch figures explicitly linked to retrieved chunks
+        List<UUID> chunkIds = textHits.stream()
+            .map(d -> {
+                try { return UUID.fromString(d.getId()); }
+                catch (Exception e) { return null; }
+            })
+            .filter(Objects::nonNull)
+            .toList();
+        List<String> linkedFigureIds = chunkIds.isEmpty()
+            ? List.of()
+            : chunkFigureRefRepository.findByChunkIdIn(chunkIds)
+                .stream().map(ChunkFigureRefEntity::getFigureId).distinct().toList();
+        List<FigureEntity> linkedFigures = linkedFigureIds.isEmpty()
+            ? List.of()
+            : figureRepository.findAllById(linkedFigureIds);
+
+        // 5. Collect figures from semantic figure search
+        List<String> semanticFigureIds = figureHits.stream()
+            .map(d -> (String) d.getMetadata().get("figure_id"))
+            .filter(Objects::nonNull)
+            .toList();
+        List<FigureEntity> semanticFigures = semanticFigureIds.isEmpty()
+            ? List.of()
+            : figureRepository.findAllById(semanticFigureIds);
+
+        // 6. Merge and deduplicate figures by figureId (linked figures take precedence)
+        Map<String, FigureEntity> merged = new LinkedHashMap<>();
+        linkedFigures.forEach(f -> merged.put(f.getId(), f));
+        semanticFigures.forEach(f -> merged.putIfAbsent(f.getId(), f));
+
+        log.debug("Retrieved {} sections, {} figures for query", sections.size(), merged.size());
+        return new RetrievalResult(sections, new ArrayList<>(merged.values()));
+    }
+}
@@ -0,0 +1,11 @@
+package com.aiteacher.retrieval;
+
+import com.aiteacher.document.FigureEntity;
+import com.aiteacher.document.SectionEntity;
+
+import java.util.List;
+
+public record RetrievalResult(
+    List<SectionEntity> parentSections,
+    List<FigureEntity> figures
+) {}