From 5acfdd33c1b55b58ac960c7e3c8a75effb5f0f54 Mon Sep 17 00:00:00 2001 From: Adrien Date: Sat, 4 Apr 2026 12:56:56 +0200 Subject: [PATCH] first implementation - image/drawing integration --- .gitignore | 12 + CLAUDE.md | 5 +- README.md | 41 ++- backend/pom.xml | 9 +- .../com/aiteacher/book/BookController.java | 36 ++- .../aiteacher/book/BookEmbeddingService.java | 211 +++++++++--- .../java/com/aiteacher/book/BookService.java | 49 ++- .../com/aiteacher/book/FigureResponse.java | 12 + .../java/com/aiteacher/chat/ChatService.java | 173 ++++++---- .../aiteacher/config/FigureStorageConfig.java | 25 ++ .../com/aiteacher/document/ChapterEntity.java | 47 +++ .../aiteacher/document/ChapterRepository.java | 9 + .../document/ChunkFigureRefEntity.java | 58 ++++ .../document/ChunkFigureRefRepository.java | 18 ++ .../document/ChunkFigureRefService.java | 62 ++++ .../com/aiteacher/document/FigureEntity.java | 82 +++++ .../document/FigureExtractionService.java | 135 ++++++++ .../aiteacher/document/FigureRepository.java | 11 + .../com/aiteacher/document/FigureType.java | 10 + .../document/PdfStructureParser.java | 71 ++++ .../com/aiteacher/document/SectionEntity.java | 63 ++++ .../aiteacher/document/SectionRepository.java | 11 + .../document/TextChunkingService.java | 65 ++++ .../document/VisionDescriptionService.java | 49 +++ .../figure/FigureStorageService.java | 24 ++ .../figure/LocalFigureStorageService.java | 59 ++++ .../retrieval/NeurosurgeryRetriever.java | 111 +++++++ .../aiteacher/retrieval/RetrievalResult.java | 11 + backend/src/main/resources/application.yaml | 10 + .../db/migration/V4__document_hierarchy.sql | 28 ++ .../db/migration/V5__figures_and_refs.sql | 29 ++ frontend/src/components/ChatMessage.vue | 146 +++++++-- frontend/src/stores/chatStore.ts | 16 +- .../checklists/embedding-retrieval.md | 73 +++++ .../checklists/requirements.md | 34 ++ .../contracts/api.md | 172 ++++++++++ specs/002-image-aware-embedding/data-model.md | 305 ++++++++++++++++++ specs/002-image-aware-embedding/plan.md | 105 ++++++ specs/002-image-aware-embedding/quickstart.md | 86 +++++ specs/002-image-aware-embedding/research.md | 188 +++++++++++ specs/002-image-aware-embedding/spec.md | 176 ++++++++++ specs/002-image-aware-embedding/tasks.md | 168 ++++++++++ 42 files changed, 2854 insertions(+), 151 deletions(-) create mode 100644 backend/src/main/java/com/aiteacher/book/FigureResponse.java create mode 100644 backend/src/main/java/com/aiteacher/config/FigureStorageConfig.java create mode 100644 backend/src/main/java/com/aiteacher/document/ChapterEntity.java create mode 100644 backend/src/main/java/com/aiteacher/document/ChapterRepository.java create mode 100644 backend/src/main/java/com/aiteacher/document/ChunkFigureRefEntity.java create mode 100644 backend/src/main/java/com/aiteacher/document/ChunkFigureRefRepository.java create mode 100644 backend/src/main/java/com/aiteacher/document/ChunkFigureRefService.java create mode 100644 backend/src/main/java/com/aiteacher/document/FigureEntity.java create mode 100644 backend/src/main/java/com/aiteacher/document/FigureExtractionService.java create mode 100644 backend/src/main/java/com/aiteacher/document/FigureRepository.java create mode 100644 backend/src/main/java/com/aiteacher/document/FigureType.java create mode 100644 backend/src/main/java/com/aiteacher/document/PdfStructureParser.java create mode 100644 backend/src/main/java/com/aiteacher/document/SectionEntity.java create mode 100644 backend/src/main/java/com/aiteacher/document/SectionRepository.java create mode 100644 backend/src/main/java/com/aiteacher/document/TextChunkingService.java create mode 100644 backend/src/main/java/com/aiteacher/document/VisionDescriptionService.java create mode 100644 backend/src/main/java/com/aiteacher/figure/FigureStorageService.java create mode 100644 backend/src/main/java/com/aiteacher/figure/LocalFigureStorageService.java create mode 100644 backend/src/main/java/com/aiteacher/retrieval/NeurosurgeryRetriever.java create mode 100644 backend/src/main/java/com/aiteacher/retrieval/RetrievalResult.java create mode 100644 backend/src/main/resources/db/migration/V4__document_hierarchy.sql create mode 100644 backend/src/main/resources/db/migration/V5__figures_and_refs.sql create mode 100644 specs/002-image-aware-embedding/checklists/embedding-retrieval.md create mode 100644 specs/002-image-aware-embedding/checklists/requirements.md create mode 100644 specs/002-image-aware-embedding/contracts/api.md create mode 100644 specs/002-image-aware-embedding/data-model.md create mode 100644 specs/002-image-aware-embedding/plan.md create mode 100644 specs/002-image-aware-embedding/quickstart.md create mode 100644 specs/002-image-aware-embedding/research.md create mode 100644 specs/002-image-aware-embedding/spec.md create mode 100644 specs/002-image-aware-embedding/tasks.md diff --git a/.gitignore b/.gitignore index c6ead03..5424719 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,15 @@ +# Runtime uploads (extracted figures) +uploads/ + +# Java build +target/ +*.class +*.jar + +# Node +node_modules/ +dist/ + # OS .DS_Store Thumbs.db diff --git a/CLAUDE.md b/CLAUDE.md index 4111402..d964a04 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,8 +1,10 @@ # ai-teacher Development Guidelines -Auto-generated from all feature plans. Last updated: 2026-03-31 +Auto-generated from all feature plans. Last updated: 2026-04-03 ## Active Technologies +- Java 25 (backend), TypeScript / Node 20 (frontend) + Spring Boot 4.0.5, Spring AI 2.0.0-M4, OpenAI API (embeddings + chat), PDFBox (via Spring AI PDF reader dependency) (002-image-aware-embedding) +- PostgreSQL (JPA + Flyway), pgvector (Spring AI `VectorStore`), local file system (extracted images — `/uploads/figures/`) (002-image-aware-embedding) - Java 21 (backend), TypeScript / Node 20 (frontend) (001-neuro-rag-learning) @@ -22,6 +24,7 @@ npm test && npm run lint Java 21 (backend), TypeScript / Node 20 (frontend): Follow standard conventions ## Recent Changes +- 002-image-aware-embedding: Added Java 25 (backend), TypeScript / Node 20 (frontend) + Spring Boot 4.0.5, Spring AI 2.0.0-M4, OpenAI API (embeddings + chat), PDFBox (via Spring AI PDF reader dependency) - 001-neuro-rag-learning: Added Java 21 (backend), TypeScript / Node 20 (frontend) diff --git a/README.md b/README.md index 0633770..e1cade0 100644 --- a/README.md +++ b/README.md @@ -11,13 +11,45 @@ graph TD User["Neurosurgeon (Browser)"] FE["Frontend\nVue.js 3 / Vite\n:5173"] BE["Backend\nSpring Boot 4 / Spring AI\n:8080"] - DB["PostgreSQL + pgvector\n(provided)"] - LLM["LLM Provider\n(OpenAI / configurable)"] + DB["PostgreSQL + pgvector\n(source of truth)"] + FS["File Store\nuploads/ (local disk)\nExtracted figure PNGs"] + LLM["LLM Provider\n(OpenAI)\nEmbeddings + Chat + Vision"] User -->|HTTP| FE FE -->|REST /api/v1/...| BE - BE -->|JDBC / pgvector| DB - BE -->|Embedding + Chat API| LLM + BE -->|"JDBC — books, chapters,\nsections, figures, refs"| DB + BE -->|"pgvector — text chunks\n+ figure caption vectors"| DB + BE -->|"PNG read/write\n(figure extraction)"| FS + FE -->|"GET /api/v1/figures/**\n(static file serving)"| BE + BE -->|"Embedding + Chat\n+ Vision (image description)"| LLM + + subgraph "Embedding Pipeline (per PDF upload)" + EP1["Parse pages → SectionEntity"] + EP2["Extract images → FigureEntity"] + EP3["Vision describe → embed caption"] + EP4["Chunk text → embed chunks"] + EP5["Link chunks ↔ figures"] + EP1 --> EP2 + EP1 --> EP4 + EP2 --> EP3 + EP4 --> EP5 + EP3 --> EP5 + end + + subgraph "Retrieval Pipeline (per chat query)" + RP1["Text chunk search (topK=5)"] + RP2["Figure caption search (topK=3)"] + RP3["Expand chunks → full section text"] + RP4["Fetch linked figures (chunk_figure_ref)"] + RP5["Merge + deduplicate figures"] + RP6["Build LLM prompt + call"] + RP1 --> RP3 + RP1 --> RP4 + RP2 --> RP5 + RP4 --> RP5 + RP3 --> RP6 + RP5 --> RP6 + end ``` ## Stack @@ -56,3 +88,4 @@ npm run dev | `DB_URL` | Yes | JDBC URL, e.g. `jdbc:postgresql://localhost:5432/aiteacher` | | `DB_USERNAME` | Yes | Database username | | `DB_PASSWORD` | Yes | Database password | +| `FIGURE_STORAGE_PATH` | No | Base path for uploaded PDFs and extracted figures (default: `./uploads`) | diff --git a/backend/pom.xml b/backend/pom.xml index 864e176..50b8e2e 100644 --- a/backend/pom.xml +++ b/backend/pom.xml @@ -95,12 +95,19 @@ spring-ai-advisors-vector-store - + org.springframework.ai spring-ai-pdf-document-reader + + + org.apache.pdfbox + pdfbox + 3.0.3 + + com.fasterxml.jackson.core diff --git a/backend/src/main/java/com/aiteacher/book/BookController.java b/backend/src/main/java/com/aiteacher/book/BookController.java index 0d2674a..c063152 100644 --- a/backend/src/main/java/com/aiteacher/book/BookController.java +++ b/backend/src/main/java/com/aiteacher/book/BookController.java @@ -1,5 +1,7 @@ package com.aiteacher.book; +import com.aiteacher.document.FigureEntity; +import com.aiteacher.document.FigureRepository; import org.springframework.http.HttpStatus; import org.springframework.http.ResponseEntity; import org.springframework.web.bind.annotation.*; @@ -15,9 +17,11 @@ import java.util.UUID; public class BookController { private final BookService bookService; + private final FigureRepository figureRepository; - public BookController(BookService bookService) { + public BookController(BookService bookService, FigureRepository figureRepository) { this.bookService = bookService; + this.figureRepository = figureRepository; } @PostMapping(consumes = "multipart/form-data") @@ -46,6 +50,36 @@ public class BookController { return ResponseEntity.noContent().build(); } + @PostMapping("/{id}/reembed") + public ResponseEntity> reembed(@PathVariable UUID id) { + Book book = bookService.reembed(id); + return ResponseEntity.accepted().body(Map.of( + "bookId", book.getId(), + "status", BookStatus.PROCESSING.name() + )); + } + + @GetMapping("/{id}/figures") + public ResponseEntity> figures(@PathVariable UUID id) { + bookService.getById(id); // 404 if not found + List responses = figureRepository.findAllByBookId(id) + .stream() + .map(f -> toFigureResponse(id, f)) + .toList(); + return ResponseEntity.ok(responses); + } + + private FigureResponse toFigureResponse(UUID bookId, FigureEntity f) { + String filename = f.getImagePath().substring(f.getImagePath().lastIndexOf('/') + 1); + String imageUrl = "/api/v1/figures/" + bookId + "/" + filename; + return new FigureResponse( + f.getId(), f.getLabel(), f.getCaption(), + f.getFigureType().name(), f.getPage(), imageUrl, + f.getSectionId(), + null // section title not eagerly loaded here + ); + } + private Map toSummaryResponse(Book book) { return Map.of( "id", book.getId(), diff --git a/backend/src/main/java/com/aiteacher/book/BookEmbeddingService.java b/backend/src/main/java/com/aiteacher/book/BookEmbeddingService.java index 88df661..96b9db7 100644 --- a/backend/src/main/java/com/aiteacher/book/BookEmbeddingService.java +++ b/backend/src/main/java/com/aiteacher/book/BookEmbeddingService.java @@ -1,41 +1,75 @@ package com.aiteacher.book; +import com.aiteacher.document.*; +import com.aiteacher.figure.FigureStorageService; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.ai.document.Document; -import org.springframework.ai.reader.pdf.PagePdfDocumentReader; -import org.springframework.ai.reader.pdf.config.PdfDocumentReaderConfig; import org.springframework.ai.vectorstore.VectorStore; import org.springframework.ai.vectorstore.filter.FilterExpressionBuilder; -import org.springframework.core.io.FileSystemResource; +import org.springframework.beans.factory.annotation.Value; import org.springframework.scheduling.annotation.Async; import org.springframework.stereotype.Service; +import org.springframework.transaction.annotation.Transactional; import java.nio.file.Path; -import java.util.List; -import java.util.UUID; -import java.util.regex.Pattern; +import java.time.Instant; +import java.util.*; @Service public class BookEmbeddingService { private static final Logger log = LoggerFactory.getLogger(BookEmbeddingService.class); - // Pattern to detect diagram/figure captions - private static final Pattern CAPTION_PATTERN = - Pattern.compile("^(Figure|Fig\\.|Table|Diagram)\\s+[\\d.]+", Pattern.CASE_INSENSITIVE); - private final VectorStore vectorStore; private final BookRepository bookRepository; - public BookEmbeddingService(VectorStore vectorStore, BookRepository bookRepository) { + @Value("${app.embedding.batch-size:50}") + private int embeddingBatchSize; + + @Value("${app.embedding.batch-delay-ms:1000}") + private long embeddingBatchDelayMs; + private final PdfStructureParser pdfStructureParser; + private final FigureExtractionService figureExtractionService; + private final VisionDescriptionService visionDescriptionService; + private final TextChunkingService textChunkingService; + private final ChunkFigureRefService chunkFigureRefService; + private final SectionRepository sectionRepository; + private final ChapterRepository chapterRepository; + private final FigureRepository figureRepository; + private final ChunkFigureRefRepository chunkFigureRefRepository; + private final FigureStorageService figureStorageService; + + public BookEmbeddingService( + VectorStore vectorStore, + BookRepository bookRepository, + PdfStructureParser pdfStructureParser, + FigureExtractionService figureExtractionService, + VisionDescriptionService visionDescriptionService, + TextChunkingService textChunkingService, + ChunkFigureRefService chunkFigureRefService, + SectionRepository sectionRepository, + ChapterRepository chapterRepository, + FigureRepository figureRepository, + ChunkFigureRefRepository chunkFigureRefRepository, + FigureStorageService figureStorageService) { this.vectorStore = vectorStore; this.bookRepository = bookRepository; + this.pdfStructureParser = pdfStructureParser; + this.figureExtractionService = figureExtractionService; + this.visionDescriptionService = visionDescriptionService; + this.textChunkingService = textChunkingService; + this.chunkFigureRefService = chunkFigureRefService; + this.sectionRepository = sectionRepository; + this.chapterRepository = chapterRepository; + this.figureRepository = figureRepository; + this.chunkFigureRefRepository = chunkFigureRefRepository; + this.figureStorageService = figureStorageService; } @Async public void embedBook(UUID bookId, String bookTitle, Path pdfPath) { - log.info("Starting embedding for book {} ({})", bookId, bookTitle); + log.info("Starting image-aware embedding for book {} ({})", bookId, bookTitle); Book book = bookRepository.findById(bookId).orElse(null); if (book == null) { @@ -47,29 +81,68 @@ public class BookEmbeddingService { book.setStatus(BookStatus.PROCESSING); bookRepository.save(book); - PagePdfDocumentReader reader = new PagePdfDocumentReader( - new FileSystemResource(pdfPath.toFile()), - PdfDocumentReaderConfig.builder() - .withPagesPerDocument(1) - .build() - ); + // Step 1: Parse PDF into page-level sections persisted in Postgres + List sections = pdfStructureParser.parse(bookId, bookTitle, pdfPath); + String chapterId = bookId + "-ch1"; - List pages = reader.get(); - int pageCount = pages.size(); + // Step 2: Build and embed text chunks for all sections in batches + List allChunks = new ArrayList<>(); + for (SectionEntity section : sections) { + List chunks = textChunkingService.chunk(section, bookTitle); + allChunks.addAll(chunks); + } + embedInBatches(allChunks, bookId); + log.info("Embedded {} text chunks for book {}", allChunks.size(), bookId); - // Enrich metadata and tag diagram captions - List enriched = pages.stream() - .map(doc -> enrichDocument(doc, bookId.toString(), bookTitle)) - .toList(); + // Step 3: Extract images from the PDF, save to file store, persist FigureEntity + List figures = figureExtractionService.extract( + bookId, chapterId, sections, pdfPath); - vectorStore.add(enriched); + // Step 4: For each figure, generate vision description and embed caption + for (FigureEntity figure : figures) { + Path imagePath = figureStorageService.resolve(figure.getImagePath()); + String description = visionDescriptionService.describe( + imagePath, figure.getCaption()); + + // Use description as caption fallback if no caption was detected + if (figure.getCaption() == null || figure.getCaption().isBlank()) { + figure.setCaption(description); + figureRepository.save(figure); + } + + // Content for embedding = vision description + caption for maximum signal + String embeddingContent = description + + (figure.getCaption() != null ? "\n" + figure.getCaption() : ""); + + String embeddingId = UUID.randomUUID().toString(); + Map metadata = buildFigureMetadata(figure, bookTitle, embeddingId); + Document figureDoc = new Document(embeddingId, embeddingContent, metadata); + vectorStore.add(List.of(figureDoc)); + + figure.setCaptionEmbeddingId(UUID.fromString(embeddingId)); + figureRepository.save(figure); + } + log.info("Embedded {} figure captions for book {}", figures.size(), bookId); + + // Step 5: Link text chunks to figures via text references + for (SectionEntity section : sections) { + List sectionChunks = allChunks.stream() + .filter(d -> section.getId().equals(d.getMetadata().get("section_id"))) + .toList(); + List sectionFigures = figures.stream() + .filter(f -> section.getId().equals(f.getSectionId())) + .toList(); + chunkFigureRefService.linkChunksToFigures( + sectionChunks, sectionFigures, section.getPageStart()); + } book.setStatus(BookStatus.READY); - book.setPageCount(pageCount); - book.setProcessedAt(java.time.Instant.now()); + book.setPageCount(sections.size()); + book.setProcessedAt(Instant.now()); bookRepository.save(book); - log.info("Finished embedding book {} — {} pages", bookId, pageCount); + log.info("Finished embedding book {} — {} pages, {} figures", + bookId, sections.size(), figures.size()); } catch (Exception ex) { log.error("Failed to embed book {}", bookId, ex); @@ -79,40 +152,74 @@ public class BookEmbeddingService { } } - private Document enrichDocument(Document doc, String bookId, String bookTitle) { - String content = doc.getText(); - String chunkType = detectChunkType(content); + @Transactional + public void deleteBookChunks(UUID bookId) { + log.info("Deleting all data for book {}", bookId); + try { + // Delete chunk-figure refs (by figureId for this book) + List figureIds = figureRepository.findAllByBookId(bookId) + .stream().map(FigureEntity::getId).toList(); + if (!figureIds.isEmpty()) { + chunkFigureRefRepository.deleteByFigureIdIn(figureIds); + } - doc.getMetadata().put("book_id", bookId); - doc.getMetadata().put("book_title", bookTitle); - doc.getMetadata().put("chunk_type", chunkType); + // Delete figures from Postgres + figureRepository.deleteAllByBookId(bookId); - return doc; + // Delete figure files from disk + figureStorageService.deleteAll(bookId); + + // Delete sections and chapters from Postgres + sectionRepository.deleteAllByBookId(bookId); + chapterRepository.deleteAllByBookId(bookId); + + // Delete vector store entries (text chunks + figure embeddings) + FilterExpressionBuilder b = new FilterExpressionBuilder(); + vectorStore.delete(b.eq("book_id", bookId.toString()).build()); + + } catch (Exception ex) { + log.warn("Error during cleanup for book {}: {}", bookId, ex.getMessage()); + } } - private String detectChunkType(String content) { - if (content != null) { - for (String line : content.split("\\r?\\n")) { - if (CAPTION_PATTERN.matcher(line.trim()).find()) { - return "diagram"; + private void embedInBatches(List docs, UUID bookId) { + int total = docs.size(); + for (int i = 0; i < total; i += embeddingBatchSize) { + List batch = docs.subList(i, Math.min(i + embeddingBatchSize, total)); + vectorStore.add(batch); + int batchNum = i / embeddingBatchSize + 1; + int totalBatches = (total - 1) / embeddingBatchSize + 1; + log.debug("Embedded batch {}/{} for book {}", batchNum, totalBatches, bookId); + if (i + embeddingBatchSize < total) { + try { + Thread.sleep(embeddingBatchDelayMs); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + log.warn("Embedding batch sleep interrupted for book {}", bookId); } } } - return "text"; } - public void deleteBookChunks(UUID bookId) { - log.info("Deleting vector chunks for book {}", bookId); - try { - FilterExpressionBuilder b = new FilterExpressionBuilder(); - vectorStore.delete(b.eq("book_id", bookId.toString()).build()); - } catch (Exception ex) { - log.warn("Could not delete vector chunks for book {}: {}", bookId, ex.getMessage()); - } + private Map buildFigureMetadata(FigureEntity figure, String bookTitle, + String embeddingId) { + Map m = new HashMap<>(); + m.put("type", "FIGURE"); + m.put("book_id", figure.getBookId().toString()); + m.put("book_title", bookTitle); + m.put("chapter_id", figure.getChapterId() != null ? figure.getChapterId() : ""); + m.put("section_id", figure.getSectionId() != null ? figure.getSectionId() : ""); + m.put("figure_id", figure.getId()); + m.put("figure_type", figure.getFigureType().name()); + m.put("image_path", figure.getImagePath()); + m.put("label", figure.getLabel() != null ? figure.getLabel() : ""); + m.put("page", figure.getPage()); + m.put("embedding_id", embeddingId); + return m; } - private String truncate(String message, int maxLength) { - if (message == null) return null; - return message.length() <= maxLength ? message : message.substring(0, maxLength); + private String truncate(String msg, int max) { + if (msg == null) return null; + return msg.length() <= max ? msg : msg.substring(0, max); } } diff --git a/backend/src/main/java/com/aiteacher/book/BookService.java b/backend/src/main/java/com/aiteacher/book/BookService.java index bda4c9a..d0dc829 100644 --- a/backend/src/main/java/com/aiteacher/book/BookService.java +++ b/backend/src/main/java/com/aiteacher/book/BookService.java @@ -1,11 +1,13 @@ package com.aiteacher.book; +import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Service; import org.springframework.web.multipart.MultipartFile; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import java.nio.file.Paths; import java.util.List; import java.util.NoSuchElementException; import java.util.UUID; @@ -15,10 +17,15 @@ public class BookService { private final BookRepository bookRepository; private final BookEmbeddingService bookEmbeddingService; + private final Path bookStoragePath; - public BookService(BookRepository bookRepository, BookEmbeddingService bookEmbeddingService) { + public BookService( + BookRepository bookRepository, + BookEmbeddingService bookEmbeddingService, + @Value("${app.figure-storage.base-path:./uploads}") String basePath) { this.bookRepository = bookRepository; this.bookEmbeddingService = bookEmbeddingService; + this.bookStoragePath = Paths.get(basePath).toAbsolutePath().normalize().resolve("books"); } public Book upload(MultipartFile file) throws IOException { @@ -28,20 +35,35 @@ public class BookService { } String title = deriveTitle(originalFilename); - Book book = new Book(title, originalFilename, file.getSize()); book = bookRepository.save(book); - // Write to a temp file so the async task can read it - Path tempFile = Files.createTempFile("aiteacher-", "-" + book.getId() + ".pdf"); - file.transferTo(tempFile.toFile()); + // Persist PDF in a stable location for potential re-embedding + Files.createDirectories(bookStoragePath); + Path pdfPath = bookStoragePath.resolve(book.getId() + ".pdf"); + file.transferTo(pdfPath.toFile()); UUID bookId = book.getId(); - Path pdfPath = tempFile; - String bookTitle = title; + bookEmbeddingService.embedBook(bookId, title, pdfPath); + return book; + } - bookEmbeddingService.embedBook(bookId, bookTitle, pdfPath); + public Book reembed(UUID id) { + Book book = bookRepository.findById(id) + .orElseThrow(() -> new NoSuchElementException("Book not found.")); + if (book.getStatus() == BookStatus.PROCESSING) { + throw new IllegalStateException("Book is already being processed."); + } + + Path pdfPath = bookStoragePath.resolve(id + ".pdf"); + if (!Files.exists(pdfPath)) { + throw new IllegalStateException( + "Original PDF not found. Please re-upload the book before re-embedding."); + } + + bookEmbeddingService.deleteBookChunks(id); + bookEmbeddingService.embedBook(id, book.getTitle(), pdfPath); return book; } @@ -63,14 +85,21 @@ public class BookService { } bookEmbeddingService.deleteBookChunks(id); + + // Delete the stored PDF + Path pdfPath = bookStoragePath.resolve(id + ".pdf"); + try { + Files.deleteIfExists(pdfPath); + } catch (IOException ex) { + // Non-fatal — log only + } + bookRepository.deleteById(id); } private String deriveTitle(String filename) { - // Strip .pdf extension and replace separators with spaces String name = filename.replaceAll("(?i)\\.pdf$", ""); name = name.replaceAll("[-_]", " "); - // Capitalise first letter if (!name.isEmpty()) { name = Character.toUpperCase(name.charAt(0)) + name.substring(1); } diff --git a/backend/src/main/java/com/aiteacher/book/FigureResponse.java b/backend/src/main/java/com/aiteacher/book/FigureResponse.java new file mode 100644 index 0000000..0babc38 --- /dev/null +++ b/backend/src/main/java/com/aiteacher/book/FigureResponse.java @@ -0,0 +1,12 @@ +package com.aiteacher.book; + +public record FigureResponse( + String figureId, + String label, + String caption, + String figureType, + int page, + String imageUrl, + String sectionId, + String sectionTitle +) {} diff --git a/backend/src/main/java/com/aiteacher/chat/ChatService.java b/backend/src/main/java/com/aiteacher/chat/ChatService.java index dadc511..1b1524d 100644 --- a/backend/src/main/java/com/aiteacher/chat/ChatService.java +++ b/backend/src/main/java/com/aiteacher/chat/ChatService.java @@ -3,22 +3,16 @@ package com.aiteacher.chat; import com.aiteacher.book.BookRepository; import com.aiteacher.book.BookStatus; import com.aiteacher.book.NoKnowledgeSourceException; +import com.aiteacher.document.FigureEntity; +import com.aiteacher.document.SectionEntity; +import com.aiteacher.retrieval.NeurosurgeryRetriever; +import com.aiteacher.retrieval.RetrievalResult; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.ai.chat.client.ChatClient; -import org.springframework.ai.chat.client.advisor.vectorstore.QuestionAnswerAdvisor; -import org.springframework.ai.chat.model.ChatResponse; -import org.springframework.ai.document.Document; -import org.springframework.ai.vectorstore.SearchRequest; -import org.springframework.ai.vectorstore.VectorStore; import org.springframework.stereotype.Service; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.NoSuchElementException; -import java.util.UUID; +import java.util.*; @Service public class ChatService { @@ -35,26 +29,28 @@ public class ChatService { - Build answers from what is present: procedures, conditions, techniques, and descriptions all contribute; combine them into a rich, structured response - Use clear structure: headings, bullet points, or numbered steps where appropriate to maximize clarity - Only say you cannot answer if the context is entirely unrelated to the question - - Cite sources for each major point (book title and page number from the context metadata) + - Cite sources for each major point (book title and page number from the context) + - When referencing diagrams or figures, cite them as [Fig. X, p.N] - Maintain continuity with the conversation history - Never fabricate clinical information not present in the context """; private final ChatClient chatClient; - private final VectorStore vectorStore; private final BookRepository bookRepository; private final ChatSessionRepository sessionRepository; private final MessageRepository messageRepository; + private final NeurosurgeryRetriever retriever; - public ChatService(ChatClient chatClient, VectorStore vectorStore, + public ChatService(ChatClient chatClient, BookRepository bookRepository, ChatSessionRepository sessionRepository, - MessageRepository messageRepository) { + MessageRepository messageRepository, + NeurosurgeryRetriever retriever) { this.chatClient = chatClient; - this.vectorStore = vectorStore; this.bookRepository = bookRepository; this.sessionRepository = sessionRepository; this.messageRepository = messageRepository; + this.retriever = retriever; } public ChatSession createSession(String topicId) { @@ -73,7 +69,11 @@ public class ChatService { ChatSession session = sessionRepository.findById(sessionId) .orElseThrow(() -> new NoSuchElementException("Session not found.")); - if (!bookRepository.existsByStatus(BookStatus.READY)) { + List readyBooks = bookRepository.findAll().stream() + .filter(b -> b.getStatus() == BookStatus.READY) + .toList(); + + if (readyBooks.isEmpty()) { throw new NoKnowledgeSourceException("No books are available as knowledge sources."); } @@ -81,27 +81,31 @@ public class ChatService { Message userMessage = new Message(sessionId, MessageRole.USER, userContent); messageRepository.save(userMessage); - // Build conversation history for context + // Build full question with conversation history List history = messageRepository.findBySessionIdOrderByCreatedAtAsc(sessionId); - - // Build the prompt with full conversation history as context String fullQuestion = buildQuestionWithHistory(history, userContent, session.getTopicId()); - var qaAdvisor = QuestionAnswerAdvisor.builder(vectorStore) - .searchRequest(SearchRequest.builder().similarityThreshold(0.5d).topK(6).build()) - .build(); - - ChatResponse response = chatClient.prompt() - .advisors(qaAdvisor) + // Retrieve context from all ready books (aggregate across books) + List allSections = new ArrayList<>(); + List allFigures = new ArrayList<>(); + for (com.aiteacher.book.Book book : readyBooks) { + RetrievalResult result = retriever.retrieve(fullQuestion, book.getId()); + allSections.addAll(result.parentSections()); + allFigures.addAll(result.figures()); + } + + // Build LLM prompt with section full texts and figure references + String contextPrompt = buildContextPrompt(fullQuestion, allSections, allFigures); + + String assistantContent = chatClient.prompt() .system(SYSTEM_PROMPT) - .user(fullQuestion) + .user(contextPrompt) .call() - .chatResponse(); + .content(); - String assistantContent = response.getResult().getOutput().getText(); - List> sources = extractSources(response); + // Build sources list with TEXT and FIGURE entries + List> sources = buildSources(allSections, allFigures); - // Persist assistant message Message assistantMessage = new Message(sessionId, MessageRole.ASSISTANT, assistantContent); assistantMessage.setSources(sources); return messageRepository.save(assistantMessage); @@ -118,24 +122,95 @@ public class ChatService { sessionRepository.deleteById(sessionId); } + // ------------------------------------------------------------------------- + // Private helpers + // ------------------------------------------------------------------------- + + private String buildContextPrompt(String question, + List sections, + List figures) { + StringBuilder sb = new StringBuilder(); + + if (!sections.isEmpty()) { + sb.append("CONTEXT:\n\n"); + for (SectionEntity section : sections) { + sb.append("[").append(section.getTitle()) + .append(", p.").append(section.getPageStart()).append("]\n"); + sb.append(section.getFullText()).append("\n\n"); + } + } + + if (!figures.isEmpty()) { + sb.append("AVAILABLE FIGURES:\n"); + for (FigureEntity figure : figures) { + sb.append("- ").append(figure.getLabel() != null ? figure.getLabel() : "Figure") + .append(" (p.").append(figure.getPage()).append("): ") + .append(figure.getCaption() != null ? figure.getCaption() : "") + .append("\n"); + } + sb.append("\nWhen referencing diagrams, cite them as [Fig. X, p.N].\n\n"); + } + + sb.append("QUESTION:\n").append(question); + return sb.toString(); + } + + private List> buildSources(List sections, + List figures) { + List> sources = new ArrayList<>(); + + for (SectionEntity section : sections) { + Map source = new LinkedHashMap<>(); + source.put("type", "TEXT"); + source.put("bookTitle", deriveTitleFromSection(section)); + source.put("page", section.getPageStart()); + source.put("chunkText", truncate(section.getFullText(), 500)); + sources.add(source); + } + + for (FigureEntity figure : figures) { + Map source = new LinkedHashMap<>(); + source.put("type", "FIGURE"); + source.put("bookTitle", bookRepository.findById(figure.getBookId()) + .map(com.aiteacher.book.Book::getTitle).orElse("Book")); + source.put("page", figure.getPage()); + source.put("figureId", figure.getId()); + source.put("label", figure.getLabel() != null ? figure.getLabel() : ""); + source.put("caption", figure.getCaption() != null ? figure.getCaption() : ""); + source.put("figureType", figure.getFigureType().name()); + // imageUrl assembled from relative path: figures/{bookId}/{filename} + String filename = figure.getImagePath().substring( + figure.getImagePath().lastIndexOf('/') + 1); + source.put("imageUrl", "/api/v1/figures/" + figure.getBookId() + "/" + filename); + sources.add(source); + } + + return sources; + } + + private String deriveTitleFromSection(SectionEntity section) { + if (section == null) return "Book"; + return bookRepository.findById(section.getBookId()) + .map(com.aiteacher.book.Book::getTitle) + .orElse("Book"); + } + private String buildQuestionWithHistory(List history, String currentQuestion, String topicId) { boolean hasTopic = topicId != null && !topicId.equals("free-form"); if (history.size() <= 1) { return hasTopic - ? String.format("[Context: This is a question about the neurosurgery topic '%s']\n%s", + ? String.format("[Context: question about neurosurgery topic '%s']\n%s", topicId, currentQuestion) : currentQuestion; } StringBuilder sb = new StringBuilder(); if (hasTopic) { - sb.append(String.format("[Context: This conversation is about the neurosurgery topic '%s']\n\n", - topicId)); + sb.append(String.format("[Context: conversation about '%s']\n\n", topicId)); } sb.append("Previous conversation:\n"); - // Include all messages except the last (which is the current user message just saved) for (int i = 0; i < history.size() - 1; i++) { Message msg = history.get(i); sb.append(msg.getRole().name()).append(": ").append(msg.getContent()).append("\n"); @@ -144,30 +219,8 @@ public class ChatService { return sb.toString(); } - private List> extractSources(ChatResponse response) { - List> sources = new ArrayList<>(); - - if (response.getMetadata() != null) { - Object retrieved = response.getMetadata().get(QuestionAnswerAdvisor.RETRIEVED_DOCUMENTS); - if (retrieved instanceof List docs) { - for (Object docObj : docs) { - if (docObj instanceof Document doc) { - Map metadata = doc.getMetadata(); - String bookTitle = (String) metadata.get("book_title"); - Object pageObj = metadata.get("page_number"); - Integer page = pageObj instanceof Number n ? n.intValue() : null; - if (bookTitle != null) { - Map source = new HashMap<>(); - source.put("bookTitle", bookTitle); - source.put("page", page); - source.put("chunkText", doc.getText()); - sources.add(source); - } - } - } - } - } - - return sources; + private String truncate(String text, int maxChars) { + if (text == null) return ""; + return text.length() <= maxChars ? text : text.substring(0, maxChars) + "…"; } } diff --git a/backend/src/main/java/com/aiteacher/config/FigureStorageConfig.java b/backend/src/main/java/com/aiteacher/config/FigureStorageConfig.java new file mode 100644 index 0000000..ee27799 --- /dev/null +++ b/backend/src/main/java/com/aiteacher/config/FigureStorageConfig.java @@ -0,0 +1,25 @@ +package com.aiteacher.config; + +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.Configuration; +import org.springframework.web.servlet.config.annotation.ResourceHandlerRegistry; +import org.springframework.web.servlet.config.annotation.WebMvcConfigurer; + +import java.nio.file.Paths; + +@Configuration +public class FigureStorageConfig implements WebMvcConfigurer { + + private final String basePath; + + public FigureStorageConfig(@Value("${app.figure-storage.base-path:./uploads}") String basePath) { + this.basePath = Paths.get(basePath).toAbsolutePath().normalize().toString(); + } + + @Override + public void addResourceHandlers(ResourceHandlerRegistry registry) { + // Serve GET /api/v1/figures/** from the local file store + registry.addResourceHandler("/api/v1/figures/**") + .addResourceLocations("file:" + basePath + "/figures/"); + } +} diff --git a/backend/src/main/java/com/aiteacher/document/ChapterEntity.java b/backend/src/main/java/com/aiteacher/document/ChapterEntity.java new file mode 100644 index 0000000..419e55b --- /dev/null +++ b/backend/src/main/java/com/aiteacher/document/ChapterEntity.java @@ -0,0 +1,47 @@ +package com.aiteacher.document; + +import jakarta.persistence.*; +import java.time.Instant; +import java.util.UUID; + +@Entity +@Table(name = "chapter") +public class ChapterEntity { + + @Id + @Column(name = "id", length = 200) + private String id; + + @Column(name = "book_id", nullable = false) + private UUID bookId; + + @Column(name = "number", nullable = false) + private int number; + + @Column(name = "title", length = 500) + private String title; + + @Column(name = "page_start") + private Integer pageStart; + + @Column(name = "created_at", nullable = false) + private Instant createdAt; + + public ChapterEntity() {} + + public ChapterEntity(String id, UUID bookId, int number, String title, Integer pageStart) { + this.id = id; + this.bookId = bookId; + this.number = number; + this.title = title; + this.pageStart = pageStart; + this.createdAt = Instant.now(); + } + + public String getId() { return id; } + public UUID getBookId() { return bookId; } + public int getNumber() { return number; } + public String getTitle() { return title; } + public Integer getPageStart() { return pageStart; } + public Instant getCreatedAt() { return createdAt; } +} diff --git a/backend/src/main/java/com/aiteacher/document/ChapterRepository.java b/backend/src/main/java/com/aiteacher/document/ChapterRepository.java new file mode 100644 index 0000000..9fddfa0 --- /dev/null +++ b/backend/src/main/java/com/aiteacher/document/ChapterRepository.java @@ -0,0 +1,9 @@ +package com.aiteacher.document; + +import org.springframework.data.jpa.repository.JpaRepository; + +import java.util.UUID; + +public interface ChapterRepository extends JpaRepository { + void deleteAllByBookId(UUID bookId); +} diff --git a/backend/src/main/java/com/aiteacher/document/ChunkFigureRefEntity.java b/backend/src/main/java/com/aiteacher/document/ChunkFigureRefEntity.java new file mode 100644 index 0000000..c1a082d --- /dev/null +++ b/backend/src/main/java/com/aiteacher/document/ChunkFigureRefEntity.java @@ -0,0 +1,58 @@ +package com.aiteacher.document; + +import jakarta.persistence.*; +import java.io.Serializable; +import java.util.Objects; +import java.util.UUID; + +@Entity +@Table(name = "chunk_figure_ref") +@IdClass(ChunkFigureRefEntity.PK.class) +public class ChunkFigureRefEntity { + + @Id + @Column(name = "chunk_id", nullable = false) + private UUID chunkId; + + @Id + @Column(name = "figure_id", nullable = false, length = 200) + private String figureId; + + @Column(name = "mention_page") + private Integer mentionPage; + + public ChunkFigureRefEntity() {} + + public ChunkFigureRefEntity(UUID chunkId, String figureId, Integer mentionPage) { + this.chunkId = chunkId; + this.figureId = figureId; + this.mentionPage = mentionPage; + } + + public UUID getChunkId() { return chunkId; } + public String getFigureId() { return figureId; } + public Integer getMentionPage() { return mentionPage; } + + public static class PK implements Serializable { + private UUID chunkId; + private String figureId; + + public PK() {} + public PK(UUID chunkId, String figureId) { + this.chunkId = chunkId; + this.figureId = figureId; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (!(o instanceof PK pk)) return false; + return Objects.equals(chunkId, pk.chunkId) && Objects.equals(figureId, pk.figureId); + } + + @Override + public int hashCode() { + return Objects.hash(chunkId, figureId); + } + } +} diff --git a/backend/src/main/java/com/aiteacher/document/ChunkFigureRefRepository.java b/backend/src/main/java/com/aiteacher/document/ChunkFigureRefRepository.java new file mode 100644 index 0000000..a39ef6e --- /dev/null +++ b/backend/src/main/java/com/aiteacher/document/ChunkFigureRefRepository.java @@ -0,0 +1,18 @@ +package com.aiteacher.document; + +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; + +import java.util.List; +import java.util.UUID; + +public interface ChunkFigureRefRepository extends JpaRepository { + + @Query("SELECT r FROM ChunkFigureRefEntity r WHERE r.chunkId IN :chunkIds") + List findByChunkIdIn(@Param("chunkIds") List chunkIds); + + @Query("DELETE FROM ChunkFigureRefEntity r WHERE r.figureId IN :figureIds") + @org.springframework.data.jpa.repository.Modifying + void deleteByFigureIdIn(@Param("figureIds") List figureIds); +} diff --git a/backend/src/main/java/com/aiteacher/document/ChunkFigureRefService.java b/backend/src/main/java/com/aiteacher/document/ChunkFigureRefService.java new file mode 100644 index 0000000..c6e536a --- /dev/null +++ b/backend/src/main/java/com/aiteacher/document/ChunkFigureRefService.java @@ -0,0 +1,62 @@ +package com.aiteacher.document; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.ai.document.Document; +import org.springframework.stereotype.Service; + +import java.util.List; +import java.util.UUID; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Scans chunk text for "Fig. X" and "Figure X" references and persists + * ChunkFigureRefEntity rows linking that chunk to its referenced figures. + */ +@Service +public class ChunkFigureRefService { + + private static final Logger log = LoggerFactory.getLogger(ChunkFigureRefService.class); + + // Matches: "Fig. 12-4", "Fig. 12.4", "Fig 12", "Figure 12-4", etc. + private static final Pattern REF_PATTERN = + Pattern.compile("(?i)\\b(Fig\\.?|Figure)\\s+(\\d+[\\-.\\d]*)"); + + private final ChunkFigureRefRepository refRepository; + + public ChunkFigureRefService(ChunkFigureRefRepository refRepository) { + this.refRepository = refRepository; + } + + /** + * For each text chunk, finds figure references and persists ChunkFigureRefEntity rows. + */ + public void linkChunksToFigures(List chunks, List bookFigures, + int pageNum) { + if (bookFigures.isEmpty()) return; + + for (Document chunk : chunks) { + String chunkIdStr = chunk.getId(); + UUID chunkId; + try { + chunkId = UUID.fromString(chunkIdStr); + } catch (IllegalArgumentException ex) { + log.warn("Chunk has non-UUID id: {}", chunkIdStr); + continue; + } + + Matcher m = REF_PATTERN.matcher(chunk.getText()); + while (m.find()) { + String refNum = m.group(2).trim(); + // Find matching figure by label suffix + for (FigureEntity figure : bookFigures) { + if (figure.getLabel() != null && figure.getLabel().endsWith(refNum)) { + refRepository.save(new ChunkFigureRefEntity(chunkId, figure.getId(), pageNum)); + break; + } + } + } + } + } +} diff --git a/backend/src/main/java/com/aiteacher/document/FigureEntity.java b/backend/src/main/java/com/aiteacher/document/FigureEntity.java new file mode 100644 index 0000000..5cffc83 --- /dev/null +++ b/backend/src/main/java/com/aiteacher/document/FigureEntity.java @@ -0,0 +1,82 @@ +package com.aiteacher.document; + +import jakarta.persistence.*; +import java.time.Instant; +import java.util.UUID; + +@Entity +@Table(name = "figure") +public class FigureEntity { + + @Id + @Column(name = "id", length = 200) + private String id; + + @Column(name = "book_id", nullable = false) + private UUID bookId; + + @Column(name = "section_id", length = 200) + private String sectionId; + + @Column(name = "chapter_id", length = 200) + private String chapterId; + + @Column(name = "label", length = 100) + private String label; + + @Column(name = "caption", columnDefinition = "TEXT") + private String caption; + + @Enumerated(EnumType.STRING) + @Column(name = "figure_type", nullable = false, length = 50) + private FigureType figureType; + + @Column(name = "page", nullable = false) + private int page; + + @Column(name = "image_path", nullable = false, length = 1000) + private String imagePath; + + @Column(name = "caption_embedding_id") + private UUID captionEmbeddingId; + + @Column(name = "created_at", nullable = false) + private Instant createdAt; + + public FigureEntity() {} + + public FigureEntity(String id, UUID bookId, String sectionId, String chapterId, + String label, String caption, FigureType figureType, + int page, String imagePath) { + this.id = id; + this.bookId = bookId; + this.sectionId = sectionId; + this.chapterId = chapterId; + this.label = label; + this.caption = caption; + this.figureType = figureType; + this.page = page; + this.imagePath = imagePath; + this.createdAt = Instant.now(); + } + + public String getId() { return id; } + public UUID getBookId() { return bookId; } + public String getSectionId() { return sectionId; } + public String getChapterId() { return chapterId; } + public String getLabel() { return label; } + public String getCaption() { return caption; } + public FigureType getFigureType() { return figureType; } + public int getPage() { return page; } + public String getImagePath() { return imagePath; } + public UUID getCaptionEmbeddingId() { return captionEmbeddingId; } + public Instant getCreatedAt() { return createdAt; } + + public void setCaptionEmbeddingId(UUID captionEmbeddingId) { + this.captionEmbeddingId = captionEmbeddingId; + } + + public void setCaption(String caption) { + this.caption = caption; + } +} diff --git a/backend/src/main/java/com/aiteacher/document/FigureExtractionService.java b/backend/src/main/java/com/aiteacher/document/FigureExtractionService.java new file mode 100644 index 0000000..a80ed52 --- /dev/null +++ b/backend/src/main/java/com/aiteacher/document/FigureExtractionService.java @@ -0,0 +1,135 @@ +package com.aiteacher.document; + +import com.aiteacher.figure.FigureStorageService; +import org.apache.pdfbox.Loader; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.graphics.PDXObject; +import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Service; + +import java.awt.image.BufferedImage; +import java.io.IOException; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; +import java.util.UUID; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Extracts images from each PDF page using PDFBox. + * Images below the configured minimum size are skipped. + * Caption is detected by the "Fig." pattern in page text. + */ +@Service +public class FigureExtractionService { + + private static final Logger log = LoggerFactory.getLogger(FigureExtractionService.class); + + // Caption: line starting with "Fig." or "Figure" followed by a number + private static final Pattern CAPTION_PATTERN = + Pattern.compile("(?m)^(Fig\\.?\\s*\\d+[\\-.]?\\d*[^\\n]*)", Pattern.CASE_INSENSITIVE); + + // Figure label: "Fig. 12-4" or "Fig. 12.4" + private static final Pattern LABEL_PATTERN = + Pattern.compile("(?i)Fig\\.?\\s*(\\d+[\\-.\\d]*)"); + + private final FigureStorageService storageService; + private final FigureRepository figureRepository; + private final int minImageSizePx; + + public FigureExtractionService( + FigureStorageService storageService, + FigureRepository figureRepository, + @Value("${app.figure-storage.min-image-size-px:100}") int minImageSizePx) { + this.storageService = storageService; + this.figureRepository = figureRepository; + this.minImageSizePx = minImageSizePx; + } + + /** + * Extracts all qualifying images from the PDF for the given book. + * Returns persisted FigureEntity list (without vision descriptions — set later). + */ + public List extract(UUID bookId, String chapterId, + List sections, Path pdfPath) { + List figures = new ArrayList<>(); + int figureCounter = 0; + + try (PDDocument doc = Loader.loadPDF(pdfPath.toFile())) { + for (SectionEntity section : sections) { + int pageIndex = section.getPageStart() - 1; // 0-based + if (pageIndex < 0 || pageIndex >= doc.getNumberOfPages()) continue; + + PDPage page = doc.getPage(pageIndex); + String pageText = section.getFullText(); + + try { + for (COSName name : page.getResources().getXObjectNames()) { + PDXObject xObject = page.getResources().getXObject(name); + if (!(xObject instanceof PDImageXObject image)) continue; + + BufferedImage bufferedImage = image.getImage(); + if (bufferedImage.getWidth() < minImageSizePx + || bufferedImage.getHeight() < minImageSizePx) { + continue; // skip decorative images + } + + figureCounter++; + String figureId = bookId + "-fig-" + pageIndex + "-" + figureCounter; + String caption = detectCaption(pageText); + String label = detectLabel(caption, figureCounter); + FigureType type = classifyType(caption, pageText); + + String imagePath = storageService.save(bookId, figureId, bufferedImage); + + FigureEntity figure = new FigureEntity( + figureId, bookId, section.getId(), chapterId, + label, caption, type, section.getPageStart(), imagePath + ); + figures.add(figureRepository.save(figure)); + } + } catch (IOException ex) { + log.warn("Failed to extract images from page {} of book {}: {}", + section.getPageStart(), bookId, ex.getMessage()); + } + } + } catch (IOException ex) { + log.error("Could not open PDF for image extraction, book {}", bookId, ex); + } + + log.info("Extracted {} figures for book {}", figures.size(), bookId); + return figures; + } + + private String detectCaption(String pageText) { + if (pageText == null) return null; + Matcher m = CAPTION_PATTERN.matcher(pageText); + return m.find() ? m.group(1).trim() : null; + } + + private String detectLabel(String caption, int counter) { + if (caption != null) { + Matcher m = LABEL_PATTERN.matcher(caption); + if (m.find()) return "Fig. " + m.group(1).trim(); + } + return "Fig. " + counter; + } + + private FigureType classifyType(String caption, String pageText) { + String combined = ((caption != null ? caption : "") + " " + (pageText != null ? pageText : "")).toLowerCase(); + if (combined.contains("mri") || combined.contains("ct ") || combined.contains("magnetic") + || combined.contains("tomography")) return FigureType.MRI_CT_SCAN; + if (combined.contains("intraoperative") || combined.contains("intra-op")) return FigureType.INTRAOPERATIVE_IMAGE; + if (caption != null && caption.toLowerCase().startsWith("table")) return FigureType.TABLE; + if (combined.contains("chart") || combined.contains("histogram") || combined.contains("graph")) + return FigureType.CHART; + if (combined.contains("photograph") || combined.contains("photo")) return FigureType.SURGICAL_PHOTOGRAPH; + return FigureType.ANATOMICAL_DIAGRAM; + } +} diff --git a/backend/src/main/java/com/aiteacher/document/FigureRepository.java b/backend/src/main/java/com/aiteacher/document/FigureRepository.java new file mode 100644 index 0000000..00413e0 --- /dev/null +++ b/backend/src/main/java/com/aiteacher/document/FigureRepository.java @@ -0,0 +1,11 @@ +package com.aiteacher.document; + +import org.springframework.data.jpa.repository.JpaRepository; + +import java.util.List; +import java.util.UUID; + +public interface FigureRepository extends JpaRepository { + List findAllByBookId(UUID bookId); + void deleteAllByBookId(UUID bookId); +} diff --git a/backend/src/main/java/com/aiteacher/document/FigureType.java b/backend/src/main/java/com/aiteacher/document/FigureType.java new file mode 100644 index 0000000..648ab7c --- /dev/null +++ b/backend/src/main/java/com/aiteacher/document/FigureType.java @@ -0,0 +1,10 @@ +package com.aiteacher.document; + +public enum FigureType { + ANATOMICAL_DIAGRAM, + SURGICAL_PHOTOGRAPH, + MRI_CT_SCAN, + TABLE, + CHART, + INTRAOPERATIVE_IMAGE +} diff --git a/backend/src/main/java/com/aiteacher/document/PdfStructureParser.java b/backend/src/main/java/com/aiteacher/document/PdfStructureParser.java new file mode 100644 index 0000000..930914f --- /dev/null +++ b/backend/src/main/java/com/aiteacher/document/PdfStructureParser.java @@ -0,0 +1,71 @@ +package com.aiteacher.document; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.ai.reader.pdf.PagePdfDocumentReader; +import org.springframework.ai.reader.pdf.config.PdfDocumentReaderConfig; +import org.springframework.core.io.FileSystemResource; +import org.springframework.stereotype.Service; +import org.springframework.transaction.annotation.Transactional; + +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; +import java.util.UUID; + +/** + * Parses a PDF into page-level SectionEntity records stored in Postgres. + * Each page becomes one section, grouped under a single chapter per book. + */ +@Service +public class PdfStructureParser { + + private static final Logger log = LoggerFactory.getLogger(PdfStructureParser.class); + + private final ChapterRepository chapterRepository; + private final SectionRepository sectionRepository; + + public PdfStructureParser(ChapterRepository chapterRepository, + SectionRepository sectionRepository) { + this.chapterRepository = chapterRepository; + this.sectionRepository = sectionRepository; + } + + @Transactional + public List parse(UUID bookId, String bookTitle, Path pdfPath) { + log.info("Parsing PDF structure for book {}", bookId); + + // One chapter per book + String chapterId = bookId + "-ch1"; + ChapterEntity chapter = new ChapterEntity(chapterId, bookId, 1, bookTitle, 1); + chapterRepository.save(chapter); + + // One section per page + PagePdfDocumentReader reader = new PagePdfDocumentReader( + new FileSystemResource(pdfPath.toFile()), + PdfDocumentReaderConfig.builder().withPagesPerDocument(1).build() + ); + + List pages = reader.get(); + List sections = new ArrayList<>(); + + for (int i = 0; i < pages.size(); i++) { + int pageNum = i + 1; + String text = pages.get(i).getText(); + if (text == null || text.isBlank()) continue; + + String sectionId = bookId + "-p" + pageNum; + SectionEntity section = new SectionEntity( + sectionId, chapterId, bookId, + String.valueOf(pageNum), + "Page " + pageNum, + pageNum, pageNum, + text + ); + sections.add(sectionRepository.save(section)); + } + + log.info("Parsed {} sections for book {}", sections.size(), bookId); + return sections; + } +} diff --git a/backend/src/main/java/com/aiteacher/document/SectionEntity.java b/backend/src/main/java/com/aiteacher/document/SectionEntity.java new file mode 100644 index 0000000..eb07051 --- /dev/null +++ b/backend/src/main/java/com/aiteacher/document/SectionEntity.java @@ -0,0 +1,63 @@ +package com.aiteacher.document; + +import jakarta.persistence.*; +import java.time.Instant; +import java.util.UUID; + +@Entity +@Table(name = "section") +public class SectionEntity { + + @Id + @Column(name = "id", length = 200) + private String id; + + @Column(name = "chapter_id", nullable = false, length = 200) + private String chapterId; + + @Column(name = "book_id", nullable = false) + private UUID bookId; + + @Column(name = "number", length = 50) + private String number; + + @Column(name = "title", length = 500) + private String title; + + @Column(name = "page_start", nullable = false) + private int pageStart; + + @Column(name = "page_end", nullable = false) + private int pageEnd; + + @Column(name = "full_text", nullable = false, columnDefinition = "TEXT") + private String fullText; + + @Column(name = "created_at", nullable = false) + private Instant createdAt; + + public SectionEntity() {} + + public SectionEntity(String id, String chapterId, UUID bookId, String number, + String title, int pageStart, int pageEnd, String fullText) { + this.id = id; + this.chapterId = chapterId; + this.bookId = bookId; + this.number = number; + this.title = title; + this.pageStart = pageStart; + this.pageEnd = pageEnd; + this.fullText = fullText; + this.createdAt = Instant.now(); + } + + public String getId() { return id; } + public String getChapterId() { return chapterId; } + public UUID getBookId() { return bookId; } + public String getNumber() { return number; } + public String getTitle() { return title; } + public int getPageStart() { return pageStart; } + public int getPageEnd() { return pageEnd; } + public String getFullText() { return fullText; } + public Instant getCreatedAt() { return createdAt; } +} diff --git a/backend/src/main/java/com/aiteacher/document/SectionRepository.java b/backend/src/main/java/com/aiteacher/document/SectionRepository.java new file mode 100644 index 0000000..4c23212 --- /dev/null +++ b/backend/src/main/java/com/aiteacher/document/SectionRepository.java @@ -0,0 +1,11 @@ +package com.aiteacher.document; + +import org.springframework.data.jpa.repository.JpaRepository; + +import java.util.List; +import java.util.UUID; + +public interface SectionRepository extends JpaRepository { + List findAllByBookId(UUID bookId); + void deleteAllByBookId(UUID bookId); +} diff --git a/backend/src/main/java/com/aiteacher/document/TextChunkingService.java b/backend/src/main/java/com/aiteacher/document/TextChunkingService.java new file mode 100644 index 0000000..776b28b --- /dev/null +++ b/backend/src/main/java/com/aiteacher/document/TextChunkingService.java @@ -0,0 +1,65 @@ +package com.aiteacher.document; + +import org.springframework.ai.document.Document; +import org.springframework.stereotype.Service; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.UUID; + +/** + * Splits a SectionEntity's full text into overlapping chunks for vector embedding. + * Target size: ~1800 characters (~450 tokens); overlap: 200 characters. + */ +@Service +public class TextChunkingService { + + private static final int TARGET_CHARS = 1800; + private static final int OVERLAP_CHARS = 200; + + public List chunk(SectionEntity section, String bookTitle) { + String text = section.getFullText(); + if (text == null || text.isBlank()) return List.of(); + + List windows = split(text); + List documents = new ArrayList<>(); + + for (int i = 0; i < windows.size(); i++) { + String chunkId = UUID.randomUUID().toString(); + Map metadata = buildMetadata(section, bookTitle, i, windows.size(), chunkId); + documents.add(new Document(chunkId, windows.get(i), metadata)); + } + return documents; + } + + private List split(String text) { + List windows = new ArrayList<>(); + int start = 0; + while (start < text.length()) { + int end = Math.min(start + TARGET_CHARS, text.length()); + windows.add(text.substring(start, end)); + if (end == text.length()) break; + start = end - OVERLAP_CHARS; + } + return windows; + } + + private Map buildMetadata(SectionEntity section, String bookTitle, + int index, int total, String chunkId) { + Map m = new HashMap<>(); + m.put("type", "TEXT"); + m.put("book_id", section.getBookId().toString()); + m.put("book_title", bookTitle); + m.put("chapter_id", section.getChapterId()); + m.put("section_id", section.getId()); + m.put("section_title", section.getTitle() != null ? section.getTitle() : ""); + m.put("page_start", section.getPageStart()); + m.put("page_end", section.getPageEnd()); + m.put("chunk_index", index); + m.put("total_chunks", total); + m.put("chunk_id", chunkId); + return m; + } +} diff --git a/backend/src/main/java/com/aiteacher/document/VisionDescriptionService.java b/backend/src/main/java/com/aiteacher/document/VisionDescriptionService.java new file mode 100644 index 0000000..4a3d18c --- /dev/null +++ b/backend/src/main/java/com/aiteacher/document/VisionDescriptionService.java @@ -0,0 +1,49 @@ +package com.aiteacher.document; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.ai.chat.client.ChatClient; +import org.springframework.core.io.FileSystemResource; +import org.springframework.stereotype.Service; +import org.springframework.util.MimeTypeUtils; + +import java.nio.file.Path; + +/** + * Generates a clinical text description for an extracted figure image + * using the OpenAI vision model via Spring AI ChatClient. + */ +@Service +public class VisionDescriptionService { + + private static final Logger log = LoggerFactory.getLogger(VisionDescriptionService.class); + + private static final String PROMPT = + "You are a neurosurgery educator. Provide a brief 2-3 sentence clinical description of " + + "this image. Focus on anatomical structures, surgical landmarks, labels, and clinical " + + "significance. If text or labels are visible, include them verbatim."; + + private final ChatClient chatClient; + + public VisionDescriptionService(ChatClient chatClient) { + this.chatClient = chatClient; + } + + /** + * Returns a description string. Falls back to the provided caption if vision fails. + */ + public String describe(Path imagePath, String captionFallback) { + try { + return chatClient.prompt() + .user(u -> u + .text(PROMPT) + .media(MimeTypeUtils.IMAGE_PNG, new FileSystemResource(imagePath.toFile()))) + .call() + .content(); + } catch (Exception ex) { + log.warn("Vision description failed for {}: {} — using caption as fallback", + imagePath.getFileName(), ex.getMessage()); + return captionFallback != null ? captionFallback : "Figure"; + } + } +} diff --git a/backend/src/main/java/com/aiteacher/figure/FigureStorageService.java b/backend/src/main/java/com/aiteacher/figure/FigureStorageService.java new file mode 100644 index 0000000..4a257ee --- /dev/null +++ b/backend/src/main/java/com/aiteacher/figure/FigureStorageService.java @@ -0,0 +1,24 @@ +package com.aiteacher.figure; + +import java.awt.image.BufferedImage; +import java.nio.file.Path; +import java.util.UUID; + +public interface FigureStorageService { + + /** + * Saves an extracted image to the figure store and returns the relative path + * (relative to the configured base-path) stored in the database. + */ + String save(UUID bookId, String figureId, BufferedImage image); + + /** + * Resolves a stored relative path to an absolute filesystem path. + */ + Path resolve(String relativePath); + + /** + * Deletes all figure files for the given book. + */ + void deleteAll(UUID bookId); +} diff --git a/backend/src/main/java/com/aiteacher/figure/LocalFigureStorageService.java b/backend/src/main/java/com/aiteacher/figure/LocalFigureStorageService.java new file mode 100644 index 0000000..48a3df3 --- /dev/null +++ b/backend/src/main/java/com/aiteacher/figure/LocalFigureStorageService.java @@ -0,0 +1,59 @@ +package com.aiteacher.figure; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Service; + +import javax.imageio.ImageIO; +import java.awt.image.BufferedImage; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.UUID; + +@Service +public class LocalFigureStorageService implements FigureStorageService { + + private static final Logger log = LoggerFactory.getLogger(LocalFigureStorageService.class); + + private final Path basePath; + + public LocalFigureStorageService(@Value("${app.figure-storage.base-path:./uploads}") String basePath) { + this.basePath = Paths.get(basePath).toAbsolutePath().normalize(); + } + + @Override + public String save(UUID bookId, String figureId, BufferedImage image) { + try { + Path dir = basePath.resolve("figures").resolve(bookId.toString()); + Files.createDirectories(dir); + String filename = figureId + ".png"; + Path file = dir.resolve(filename); + ImageIO.write(image, "PNG", file.toFile()); + // Return relative path for storage in DB + return "figures/" + bookId + "/" + filename; + } catch (IOException ex) { + throw new RuntimeException("Failed to save figure " + figureId, ex); + } + } + + @Override + public Path resolve(String relativePath) { + return basePath.resolve(relativePath); + } + + @Override + public void deleteAll(UUID bookId) { + Path dir = basePath.resolve("figures").resolve(bookId.toString()); + if (!Files.exists(dir)) return; + try (var walk = Files.walk(dir)) { + walk.sorted(java.util.Comparator.reverseOrder()) + .map(Path::toFile) + .forEach(java.io.File::delete); + } catch (IOException ex) { + log.warn("Could not fully delete figures for book {}: {}", bookId, ex.getMessage()); + } + } +} diff --git a/backend/src/main/java/com/aiteacher/retrieval/NeurosurgeryRetriever.java b/backend/src/main/java/com/aiteacher/retrieval/NeurosurgeryRetriever.java new file mode 100644 index 0000000..44ef22f --- /dev/null +++ b/backend/src/main/java/com/aiteacher/retrieval/NeurosurgeryRetriever.java @@ -0,0 +1,111 @@ +package com.aiteacher.retrieval; + +import com.aiteacher.document.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.ai.document.Document; +import org.springframework.ai.vectorstore.SearchRequest; +import org.springframework.ai.vectorstore.VectorStore; +import org.springframework.ai.vectorstore.filter.FilterExpressionBuilder; +import org.springframework.stereotype.Service; + +import java.util.*; + +/** + * Dual-modality retriever: searches text chunks and figure captions independently, + * then expands text hits to their parent sections and merges linked figures. + */ +@Service +public class NeurosurgeryRetriever { + + private static final Logger log = LoggerFactory.getLogger(NeurosurgeryRetriever.class); + + private static final int TEXT_TOP_K = 5; + private static final int FIGURE_TOP_K = 3; + + private final VectorStore vectorStore; + private final SectionRepository sectionRepository; + private final FigureRepository figureRepository; + private final ChunkFigureRefRepository chunkFigureRefRepository; + + public NeurosurgeryRetriever(VectorStore vectorStore, + SectionRepository sectionRepository, + FigureRepository figureRepository, + ChunkFigureRefRepository chunkFigureRefRepository) { + this.vectorStore = vectorStore; + this.sectionRepository = sectionRepository; + this.figureRepository = figureRepository; + this.chunkFigureRefRepository = chunkFigureRefRepository; + } + + public RetrievalResult retrieve(String query, UUID bookId) { + FilterExpressionBuilder b = new FilterExpressionBuilder(); + + // 1. Text chunk search + List textHits = vectorStore.similaritySearch( + SearchRequest.builder() + .query(query) + .topK(TEXT_TOP_K) + .filterExpression(b.and( + b.eq("type", "TEXT"), + b.eq("book_id", bookId.toString()) + ).build()) + .build() + ); + + // 2. Figure caption search (independent topK) + List figureHits = vectorStore.similaritySearch( + SearchRequest.builder() + .query(query) + .topK(FIGURE_TOP_K) + .filterExpression(b.and( + b.eq("type", "FIGURE"), + b.eq("book_id", bookId.toString()) + ).build()) + .build() + ); + + // 3. Expand text chunks to parent sections from Postgres + List sectionIds = textHits.stream() + .map(d -> (String) d.getMetadata().get("section_id")) + .filter(Objects::nonNull) + .distinct() + .toList(); + List sections = sectionIds.isEmpty() + ? List.of() + : sectionRepository.findAllById(sectionIds); + + // 4. Fetch figures explicitly linked to retrieved chunks + List chunkIds = textHits.stream() + .map(d -> { + try { return UUID.fromString(d.getId()); } + catch (Exception e) { return null; } + }) + .filter(Objects::nonNull) + .toList(); + List linkedFigureIds = chunkIds.isEmpty() + ? List.of() + : chunkFigureRefRepository.findByChunkIdIn(chunkIds) + .stream().map(ChunkFigureRefEntity::getFigureId).distinct().toList(); + List linkedFigures = linkedFigureIds.isEmpty() + ? List.of() + : figureRepository.findAllById(linkedFigureIds); + + // 5. Collect figures from semantic figure search + List semanticFigureIds = figureHits.stream() + .map(d -> (String) d.getMetadata().get("figure_id")) + .filter(Objects::nonNull) + .toList(); + List semanticFigures = semanticFigureIds.isEmpty() + ? List.of() + : figureRepository.findAllById(semanticFigureIds); + + // 6. Merge and deduplicate figures by figureId (linked figures take precedence) + Map merged = new LinkedHashMap<>(); + linkedFigures.forEach(f -> merged.put(f.getId(), f)); + semanticFigures.forEach(f -> merged.putIfAbsent(f.getId(), f)); + + log.debug("Retrieved {} sections, {} figures for query", sections.size(), merged.size()); + return new RetrievalResult(sections, new ArrayList<>(merged.values())); + } +} diff --git a/backend/src/main/java/com/aiteacher/retrieval/RetrievalResult.java b/backend/src/main/java/com/aiteacher/retrieval/RetrievalResult.java new file mode 100644 index 0000000..0fc4499 --- /dev/null +++ b/backend/src/main/java/com/aiteacher/retrieval/RetrievalResult.java @@ -0,0 +1,11 @@ +package com.aiteacher.retrieval; + +import com.aiteacher.document.FigureEntity; +import com.aiteacher.document.SectionEntity; + +import java.util.List; + +public record RetrievalResult( + List parentSections, + List figures +) {} diff --git a/backend/src/main/resources/application.yaml b/backend/src/main/resources/application.yaml index 9fc3ba8..47a929d 100644 --- a/backend/src/main/resources/application.yaml +++ b/backend/src/main/resources/application.yaml @@ -47,6 +47,16 @@ spring: max-size: 8 queue-capacity: 50 +logging: + level: + "[org.apache.pdfbox]": ERROR + app: auth: password: ${APP_PASSWORD:changeme} + figure-storage: + base-path: ${FIGURE_STORAGE_PATH:./uploads} + min-image-size-px: 100 + embedding: + batch-size: 20 + batch-delay-ms: 2000 diff --git a/backend/src/main/resources/db/migration/V4__document_hierarchy.sql b/backend/src/main/resources/db/migration/V4__document_hierarchy.sql new file mode 100644 index 0000000..f41d156 --- /dev/null +++ b/backend/src/main/resources/db/migration/V4__document_hierarchy.sql @@ -0,0 +1,28 @@ +-- ============================================================ +-- V4: Document hierarchy — chapter and section tables +-- Supports parent-child retrieval pattern for RAG precision. +-- ============================================================ + +CREATE TABLE IF NOT EXISTS chapter ( + id VARCHAR(200) PRIMARY KEY, + book_id UUID NOT NULL REFERENCES book(id) ON DELETE CASCADE, + number INT NOT NULL DEFAULT 1, + title VARCHAR(500), + page_start INT, + created_at TIMESTAMPTZ NOT NULL DEFAULT now() +); + +CREATE TABLE IF NOT EXISTS section ( + id VARCHAR(200) PRIMARY KEY, + chapter_id VARCHAR(200) NOT NULL REFERENCES chapter(id) ON DELETE CASCADE, + book_id UUID NOT NULL REFERENCES book(id) ON DELETE CASCADE, + number VARCHAR(50), + title VARCHAR(500), + page_start INT NOT NULL, + page_end INT NOT NULL, + full_text TEXT NOT NULL, + created_at TIMESTAMPTZ NOT NULL DEFAULT now() +); + +CREATE INDEX IF NOT EXISTS idx_section_book ON section(book_id); +CREATE INDEX IF NOT EXISTS idx_section_chapter ON section(chapter_id); diff --git a/backend/src/main/resources/db/migration/V5__figures_and_refs.sql b/backend/src/main/resources/db/migration/V5__figures_and_refs.sql new file mode 100644 index 0000000..6960f1e --- /dev/null +++ b/backend/src/main/resources/db/migration/V5__figures_and_refs.sql @@ -0,0 +1,29 @@ +-- ============================================================ +-- V5: Figures and chunk-to-figure reference table +-- figure: metadata + file path for each extracted image +-- chunk_figure_ref: links vector-store chunks to figures +-- ============================================================ + +CREATE TABLE IF NOT EXISTS figure ( + id VARCHAR(200) PRIMARY KEY, + book_id UUID NOT NULL REFERENCES book(id) ON DELETE CASCADE, + section_id VARCHAR(200) REFERENCES section(id) ON DELETE SET NULL, + chapter_id VARCHAR(200) REFERENCES chapter(id) ON DELETE SET NULL, + label VARCHAR(100), + caption TEXT, + figure_type VARCHAR(50) NOT NULL, + page INT NOT NULL, + image_path VARCHAR(1000) NOT NULL, + caption_embedding_id UUID, + created_at TIMESTAMPTZ NOT NULL DEFAULT now() +); + +CREATE TABLE IF NOT EXISTS chunk_figure_ref ( + chunk_id UUID NOT NULL, + figure_id VARCHAR(200) NOT NULL REFERENCES figure(id) ON DELETE CASCADE, + mention_page INT, + PRIMARY KEY (chunk_id, figure_id) +); + +CREATE INDEX IF NOT EXISTS idx_figure_book ON figure(book_id); +CREATE INDEX IF NOT EXISTS idx_cfr_chunk ON chunk_figure_ref(chunk_id); diff --git a/frontend/src/components/ChatMessage.vue b/frontend/src/components/ChatMessage.vue index 4f3707c..a65e1c6 100644 --- a/frontend/src/components/ChatMessage.vue +++ b/frontend/src/components/ChatMessage.vue @@ -5,22 +5,47 @@
{{ message.content }}
- +
Sources:
+
-
- 📖 +
+ 📖 {{ source.bookTitle }} p. {{ source.page }}
{{ source.chunkText }}
+ + +
+
+ 🖼️ + {{ source.label || 'Figure' }} + p. {{ source.page }} + {{ formatFigureType(source.figureType) }} +
+
{{ source.caption }}
+
+ +
+
@@ -32,7 +57,7 @@