first implementation - image/drawing integration

This commit is contained in:
Adrien
2026-04-04 12:56:56 +02:00
parent fc5b22fba1
commit 5acfdd33c1
42 changed files with 2854 additions and 151 deletions
@@ -1,5 +1,7 @@
package com.aiteacher.book;
import com.aiteacher.document.FigureEntity;
import com.aiteacher.document.FigureRepository;
import org.springframework.http.HttpStatus;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.*;
@@ -15,9 +17,11 @@ import java.util.UUID;
public class BookController {
private final BookService bookService;
private final FigureRepository figureRepository;
public BookController(BookService bookService) {
public BookController(BookService bookService, FigureRepository figureRepository) {
this.bookService = bookService;
this.figureRepository = figureRepository;
}
@PostMapping(consumes = "multipart/form-data")
@@ -46,6 +50,36 @@ public class BookController {
return ResponseEntity.noContent().build();
}
@PostMapping("/{id}/reembed")
public ResponseEntity<Map<String, Object>> reembed(@PathVariable UUID id) {
Book book = bookService.reembed(id);
return ResponseEntity.accepted().body(Map.of(
"bookId", book.getId(),
"status", BookStatus.PROCESSING.name()
));
}
@GetMapping("/{id}/figures")
public ResponseEntity<List<FigureResponse>> figures(@PathVariable UUID id) {
bookService.getById(id); // 404 if not found
List<FigureResponse> responses = figureRepository.findAllByBookId(id)
.stream()
.map(f -> toFigureResponse(id, f))
.toList();
return ResponseEntity.ok(responses);
}
private FigureResponse toFigureResponse(UUID bookId, FigureEntity f) {
String filename = f.getImagePath().substring(f.getImagePath().lastIndexOf('/') + 1);
String imageUrl = "/api/v1/figures/" + bookId + "/" + filename;
return new FigureResponse(
f.getId(), f.getLabel(), f.getCaption(),
f.getFigureType().name(), f.getPage(), imageUrl,
f.getSectionId(),
null // section title not eagerly loaded here
);
}
private Map<String, Object> toSummaryResponse(Book book) {
return Map.of(
"id", book.getId(),
@@ -1,41 +1,75 @@
package com.aiteacher.book;
import com.aiteacher.document.*;
import com.aiteacher.figure.FigureStorageService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.ai.document.Document;
import org.springframework.ai.reader.pdf.PagePdfDocumentReader;
import org.springframework.ai.reader.pdf.config.PdfDocumentReaderConfig;
import org.springframework.ai.vectorstore.VectorStore;
import org.springframework.ai.vectorstore.filter.FilterExpressionBuilder;
import org.springframework.core.io.FileSystemResource;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import java.nio.file.Path;
import java.util.List;
import java.util.UUID;
import java.util.regex.Pattern;
import java.time.Instant;
import java.util.*;
@Service
public class BookEmbeddingService {
private static final Logger log = LoggerFactory.getLogger(BookEmbeddingService.class);
// Pattern to detect diagram/figure captions
private static final Pattern CAPTION_PATTERN =
Pattern.compile("^(Figure|Fig\\.|Table|Diagram)\\s+[\\d.]+", Pattern.CASE_INSENSITIVE);
private final VectorStore vectorStore;
private final BookRepository bookRepository;
public BookEmbeddingService(VectorStore vectorStore, BookRepository bookRepository) {
@Value("${app.embedding.batch-size:50}")
private int embeddingBatchSize;
@Value("${app.embedding.batch-delay-ms:1000}")
private long embeddingBatchDelayMs;
private final PdfStructureParser pdfStructureParser;
private final FigureExtractionService figureExtractionService;
private final VisionDescriptionService visionDescriptionService;
private final TextChunkingService textChunkingService;
private final ChunkFigureRefService chunkFigureRefService;
private final SectionRepository sectionRepository;
private final ChapterRepository chapterRepository;
private final FigureRepository figureRepository;
private final ChunkFigureRefRepository chunkFigureRefRepository;
private final FigureStorageService figureStorageService;
public BookEmbeddingService(
VectorStore vectorStore,
BookRepository bookRepository,
PdfStructureParser pdfStructureParser,
FigureExtractionService figureExtractionService,
VisionDescriptionService visionDescriptionService,
TextChunkingService textChunkingService,
ChunkFigureRefService chunkFigureRefService,
SectionRepository sectionRepository,
ChapterRepository chapterRepository,
FigureRepository figureRepository,
ChunkFigureRefRepository chunkFigureRefRepository,
FigureStorageService figureStorageService) {
this.vectorStore = vectorStore;
this.bookRepository = bookRepository;
this.pdfStructureParser = pdfStructureParser;
this.figureExtractionService = figureExtractionService;
this.visionDescriptionService = visionDescriptionService;
this.textChunkingService = textChunkingService;
this.chunkFigureRefService = chunkFigureRefService;
this.sectionRepository = sectionRepository;
this.chapterRepository = chapterRepository;
this.figureRepository = figureRepository;
this.chunkFigureRefRepository = chunkFigureRefRepository;
this.figureStorageService = figureStorageService;
}
@Async
public void embedBook(UUID bookId, String bookTitle, Path pdfPath) {
log.info("Starting embedding for book {} ({})", bookId, bookTitle);
log.info("Starting image-aware embedding for book {} ({})", bookId, bookTitle);
Book book = bookRepository.findById(bookId).orElse(null);
if (book == null) {
@@ -47,29 +81,68 @@ public class BookEmbeddingService {
book.setStatus(BookStatus.PROCESSING);
bookRepository.save(book);
PagePdfDocumentReader reader = new PagePdfDocumentReader(
new FileSystemResource(pdfPath.toFile()),
PdfDocumentReaderConfig.builder()
.withPagesPerDocument(1)
.build()
);
// Step 1: Parse PDF into page-level sections persisted in Postgres
List<SectionEntity> sections = pdfStructureParser.parse(bookId, bookTitle, pdfPath);
String chapterId = bookId + "-ch1";
List<Document> pages = reader.get();
int pageCount = pages.size();
// Step 2: Build and embed text chunks for all sections in batches
List<Document> allChunks = new ArrayList<>();
for (SectionEntity section : sections) {
List<Document> chunks = textChunkingService.chunk(section, bookTitle);
allChunks.addAll(chunks);
}
embedInBatches(allChunks, bookId);
log.info("Embedded {} text chunks for book {}", allChunks.size(), bookId);
// Enrich metadata and tag diagram captions
List<Document> enriched = pages.stream()
.map(doc -> enrichDocument(doc, bookId.toString(), bookTitle))
.toList();
// Step 3: Extract images from the PDF, save to file store, persist FigureEntity
List<FigureEntity> figures = figureExtractionService.extract(
bookId, chapterId, sections, pdfPath);
vectorStore.add(enriched);
// Step 4: For each figure, generate vision description and embed caption
for (FigureEntity figure : figures) {
Path imagePath = figureStorageService.resolve(figure.getImagePath());
String description = visionDescriptionService.describe(
imagePath, figure.getCaption());
// Use description as caption fallback if no caption was detected
if (figure.getCaption() == null || figure.getCaption().isBlank()) {
figure.setCaption(description);
figureRepository.save(figure);
}
// Content for embedding = vision description + caption for maximum signal
String embeddingContent = description
+ (figure.getCaption() != null ? "\n" + figure.getCaption() : "");
String embeddingId = UUID.randomUUID().toString();
Map<String, Object> metadata = buildFigureMetadata(figure, bookTitle, embeddingId);
Document figureDoc = new Document(embeddingId, embeddingContent, metadata);
vectorStore.add(List.of(figureDoc));
figure.setCaptionEmbeddingId(UUID.fromString(embeddingId));
figureRepository.save(figure);
}
log.info("Embedded {} figure captions for book {}", figures.size(), bookId);
// Step 5: Link text chunks to figures via text references
for (SectionEntity section : sections) {
List<Document> sectionChunks = allChunks.stream()
.filter(d -> section.getId().equals(d.getMetadata().get("section_id")))
.toList();
List<FigureEntity> sectionFigures = figures.stream()
.filter(f -> section.getId().equals(f.getSectionId()))
.toList();
chunkFigureRefService.linkChunksToFigures(
sectionChunks, sectionFigures, section.getPageStart());
}
book.setStatus(BookStatus.READY);
book.setPageCount(pageCount);
book.setProcessedAt(java.time.Instant.now());
book.setPageCount(sections.size());
book.setProcessedAt(Instant.now());
bookRepository.save(book);
log.info("Finished embedding book {} — {} pages", bookId, pageCount);
log.info("Finished embedding book {} — {} pages, {} figures",
bookId, sections.size(), figures.size());
} catch (Exception ex) {
log.error("Failed to embed book {}", bookId, ex);
@@ -79,40 +152,74 @@ public class BookEmbeddingService {
}
}
private Document enrichDocument(Document doc, String bookId, String bookTitle) {
String content = doc.getText();
String chunkType = detectChunkType(content);
@Transactional
public void deleteBookChunks(UUID bookId) {
log.info("Deleting all data for book {}", bookId);
try {
// Delete chunk-figure refs (by figureId for this book)
List<String> figureIds = figureRepository.findAllByBookId(bookId)
.stream().map(FigureEntity::getId).toList();
if (!figureIds.isEmpty()) {
chunkFigureRefRepository.deleteByFigureIdIn(figureIds);
}
doc.getMetadata().put("book_id", bookId);
doc.getMetadata().put("book_title", bookTitle);
doc.getMetadata().put("chunk_type", chunkType);
// Delete figures from Postgres
figureRepository.deleteAllByBookId(bookId);
return doc;
// Delete figure files from disk
figureStorageService.deleteAll(bookId);
// Delete sections and chapters from Postgres
sectionRepository.deleteAllByBookId(bookId);
chapterRepository.deleteAllByBookId(bookId);
// Delete vector store entries (text chunks + figure embeddings)
FilterExpressionBuilder b = new FilterExpressionBuilder();
vectorStore.delete(b.eq("book_id", bookId.toString()).build());
} catch (Exception ex) {
log.warn("Error during cleanup for book {}: {}", bookId, ex.getMessage());
}
}
private String detectChunkType(String content) {
if (content != null) {
for (String line : content.split("\\r?\\n")) {
if (CAPTION_PATTERN.matcher(line.trim()).find()) {
return "diagram";
private void embedInBatches(List<Document> docs, UUID bookId) {
int total = docs.size();
for (int i = 0; i < total; i += embeddingBatchSize) {
List<Document> batch = docs.subList(i, Math.min(i + embeddingBatchSize, total));
vectorStore.add(batch);
int batchNum = i / embeddingBatchSize + 1;
int totalBatches = (total - 1) / embeddingBatchSize + 1;
log.debug("Embedded batch {}/{} for book {}", batchNum, totalBatches, bookId);
if (i + embeddingBatchSize < total) {
try {
Thread.sleep(embeddingBatchDelayMs);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
log.warn("Embedding batch sleep interrupted for book {}", bookId);
}
}
}
return "text";
}
public void deleteBookChunks(UUID bookId) {
log.info("Deleting vector chunks for book {}", bookId);
try {
FilterExpressionBuilder b = new FilterExpressionBuilder();
vectorStore.delete(b.eq("book_id", bookId.toString()).build());
} catch (Exception ex) {
log.warn("Could not delete vector chunks for book {}: {}", bookId, ex.getMessage());
}
private Map<String, Object> buildFigureMetadata(FigureEntity figure, String bookTitle,
String embeddingId) {
Map<String, Object> m = new HashMap<>();
m.put("type", "FIGURE");
m.put("book_id", figure.getBookId().toString());
m.put("book_title", bookTitle);
m.put("chapter_id", figure.getChapterId() != null ? figure.getChapterId() : "");
m.put("section_id", figure.getSectionId() != null ? figure.getSectionId() : "");
m.put("figure_id", figure.getId());
m.put("figure_type", figure.getFigureType().name());
m.put("image_path", figure.getImagePath());
m.put("label", figure.getLabel() != null ? figure.getLabel() : "");
m.put("page", figure.getPage());
m.put("embedding_id", embeddingId);
return m;
}
private String truncate(String message, int maxLength) {
if (message == null) return null;
return message.length() <= maxLength ? message : message.substring(0, maxLength);
private String truncate(String msg, int max) {
if (msg == null) return null;
return msg.length() <= max ? msg : msg.substring(0, max);
}
}
@@ -1,11 +1,13 @@
package com.aiteacher.book;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import org.springframework.web.multipart.MultipartFile;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.UUID;
@@ -15,10 +17,15 @@ public class BookService {
private final BookRepository bookRepository;
private final BookEmbeddingService bookEmbeddingService;
private final Path bookStoragePath;
public BookService(BookRepository bookRepository, BookEmbeddingService bookEmbeddingService) {
public BookService(
BookRepository bookRepository,
BookEmbeddingService bookEmbeddingService,
@Value("${app.figure-storage.base-path:./uploads}") String basePath) {
this.bookRepository = bookRepository;
this.bookEmbeddingService = bookEmbeddingService;
this.bookStoragePath = Paths.get(basePath).toAbsolutePath().normalize().resolve("books");
}
public Book upload(MultipartFile file) throws IOException {
@@ -28,20 +35,35 @@ public class BookService {
}
String title = deriveTitle(originalFilename);
Book book = new Book(title, originalFilename, file.getSize());
book = bookRepository.save(book);
// Write to a temp file so the async task can read it
Path tempFile = Files.createTempFile("aiteacher-", "-" + book.getId() + ".pdf");
file.transferTo(tempFile.toFile());
// Persist PDF in a stable location for potential re-embedding
Files.createDirectories(bookStoragePath);
Path pdfPath = bookStoragePath.resolve(book.getId() + ".pdf");
file.transferTo(pdfPath.toFile());
UUID bookId = book.getId();
Path pdfPath = tempFile;
String bookTitle = title;
bookEmbeddingService.embedBook(bookId, title, pdfPath);
return book;
}
bookEmbeddingService.embedBook(bookId, bookTitle, pdfPath);
public Book reembed(UUID id) {
Book book = bookRepository.findById(id)
.orElseThrow(() -> new NoSuchElementException("Book not found."));
if (book.getStatus() == BookStatus.PROCESSING) {
throw new IllegalStateException("Book is already being processed.");
}
Path pdfPath = bookStoragePath.resolve(id + ".pdf");
if (!Files.exists(pdfPath)) {
throw new IllegalStateException(
"Original PDF not found. Please re-upload the book before re-embedding.");
}
bookEmbeddingService.deleteBookChunks(id);
bookEmbeddingService.embedBook(id, book.getTitle(), pdfPath);
return book;
}
@@ -63,14 +85,21 @@ public class BookService {
}
bookEmbeddingService.deleteBookChunks(id);
// Delete the stored PDF
Path pdfPath = bookStoragePath.resolve(id + ".pdf");
try {
Files.deleteIfExists(pdfPath);
} catch (IOException ex) {
// Non-fatal — log only
}
bookRepository.deleteById(id);
}
private String deriveTitle(String filename) {
// Strip .pdf extension and replace separators with spaces
String name = filename.replaceAll("(?i)\\.pdf$", "");
name = name.replaceAll("[-_]", " ");
// Capitalise first letter
if (!name.isEmpty()) {
name = Character.toUpperCase(name.charAt(0)) + name.substring(1);
}
@@ -0,0 +1,12 @@
package com.aiteacher.book;
public record FigureResponse(
String figureId,
String label,
String caption,
String figureType,
int page,
String imageUrl,
String sectionId,
String sectionTitle
) {}