first implementation

This commit is contained in:
Adrien
2026-03-31 20:58:47 +02:00
parent dc0bcab36e
commit 618e28b354
1878 changed files with 1381732 additions and 5 deletions
@@ -0,0 +1,122 @@
package com.aiteacher.book;
import jakarta.persistence.*;
import java.time.Instant;
import java.util.UUID;
@Entity
@Table(name = "book")
public class Book {
@Id
@GeneratedValue(strategy = GenerationType.UUID)
private UUID id;
@Column(name = "title", nullable = false, length = 500)
private String title;
@Column(name = "file_name", nullable = false, length = 500)
private String fileName;
@Column(name = "file_size_bytes", nullable = false)
private long fileSizeBytes;
@Column(name = "page_count")
private Integer pageCount;
@Enumerated(EnumType.STRING)
@Column(name = "status", nullable = false, length = 20)
private BookStatus status;
@Column(name = "error_message", columnDefinition = "TEXT")
private String errorMessage;
@Column(name = "uploaded_at", nullable = false)
private Instant uploadedAt;
@Column(name = "processed_at")
private Instant processedAt;
// Constructors
public Book() {
}
public Book(String title, String fileName, long fileSizeBytes) {
this.title = title;
this.fileName = fileName;
this.fileSizeBytes = fileSizeBytes;
this.status = BookStatus.PENDING;
this.uploadedAt = Instant.now();
}
// Getters & Setters
public UUID getId() {
return id;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getFileName() {
return fileName;
}
public void setFileName(String fileName) {
this.fileName = fileName;
}
public long getFileSizeBytes() {
return fileSizeBytes;
}
public void setFileSizeBytes(long fileSizeBytes) {
this.fileSizeBytes = fileSizeBytes;
}
public Integer getPageCount() {
return pageCount;
}
public void setPageCount(Integer pageCount) {
this.pageCount = pageCount;
}
public BookStatus getStatus() {
return status;
}
public void setStatus(BookStatus status) {
this.status = status;
}
public String getErrorMessage() {
return errorMessage;
}
public void setErrorMessage(String errorMessage) {
this.errorMessage = errorMessage;
}
public Instant getUploadedAt() {
return uploadedAt;
}
public void setUploadedAt(Instant uploadedAt) {
this.uploadedAt = uploadedAt;
}
public Instant getProcessedAt() {
return processedAt;
}
public void setProcessedAt(Instant processedAt) {
this.processedAt = processedAt;
}
}
@@ -0,0 +1,75 @@
package com.aiteacher.book;
import org.springframework.http.HttpStatus;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.*;
import org.springframework.web.multipart.MultipartFile;
import java.io.IOException;
import java.time.Instant;
import java.util.List;
import java.util.Map;
import java.util.UUID;
@RestController
@RequestMapping("/api/v1/books")
public class BookController {
private final BookService bookService;
public BookController(BookService bookService) {
this.bookService = bookService;
}
@PostMapping(consumes = "multipart/form-data")
public ResponseEntity<?> upload(@RequestParam("file") MultipartFile file) throws IOException {
Book book = bookService.upload(file);
return ResponseEntity.status(HttpStatus.ACCEPTED).body(toSummaryResponse(book));
}
@GetMapping
public ResponseEntity<List<Map<String, Object>>> list() {
List<Map<String, Object>> books = bookService.listAll().stream()
.map(this::toFullResponse)
.toList();
return ResponseEntity.ok(books);
}
@GetMapping("/{id}")
public ResponseEntity<Map<String, Object>> get(@PathVariable UUID id) {
Book book = bookService.getById(id);
return ResponseEntity.ok(toFullResponse(book));
}
@DeleteMapping("/{id}")
public ResponseEntity<Void> delete(@PathVariable UUID id) {
bookService.delete(id);
return ResponseEntity.noContent().build();
}
private Map<String, Object> toSummaryResponse(Book book) {
return Map.of(
"id", book.getId(),
"title", book.getTitle(),
"fileName", book.getFileName(),
"status", book.getStatus().name(),
"uploadedAt", book.getUploadedAt()
);
}
private Map<String, Object> toFullResponse(Book book) {
var map = new java.util.LinkedHashMap<String, Object>();
map.put("id", book.getId());
map.put("title", book.getTitle());
map.put("fileName", book.getFileName());
map.put("fileSizeBytes", book.getFileSizeBytes());
map.put("pageCount", book.getPageCount());
map.put("status", book.getStatus().name());
map.put("uploadedAt", book.getUploadedAt());
map.put("processedAt", book.getProcessedAt());
if (book.getErrorMessage() != null) {
map.put("errorMessage", book.getErrorMessage());
}
return map;
}
}
@@ -0,0 +1,118 @@
package com.aiteacher.book;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.ai.document.Document;
import org.springframework.ai.reader.pdf.PagePdfDocumentReader;
import org.springframework.ai.reader.pdf.config.PdfDocumentReaderConfig;
import org.springframework.ai.vectorstore.VectorStore;
import org.springframework.ai.vectorstore.filter.FilterExpressionBuilder;
import org.springframework.core.io.FileSystemResource;
import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Service;
import java.nio.file.Path;
import java.util.List;
import java.util.UUID;
import java.util.regex.Pattern;
@Service
public class BookEmbeddingService {
private static final Logger log = LoggerFactory.getLogger(BookEmbeddingService.class);
// Pattern to detect diagram/figure captions
private static final Pattern CAPTION_PATTERN =
Pattern.compile("^(Figure|Fig\\.|Table|Diagram)\\s+[\\d.]+", Pattern.CASE_INSENSITIVE);
private final VectorStore vectorStore;
private final BookRepository bookRepository;
public BookEmbeddingService(VectorStore vectorStore, BookRepository bookRepository) {
this.vectorStore = vectorStore;
this.bookRepository = bookRepository;
}
@Async
public void embedBook(UUID bookId, String bookTitle, Path pdfPath) {
log.info("Starting embedding for book {} ({})", bookId, bookTitle);
Book book = bookRepository.findById(bookId).orElse(null);
if (book == null) {
log.warn("Book {} not found, skipping embedding", bookId);
return;
}
try {
book.setStatus(BookStatus.PROCESSING);
bookRepository.save(book);
PagePdfDocumentReader reader = new PagePdfDocumentReader(
new FileSystemResource(pdfPath.toFile()),
PdfDocumentReaderConfig.builder()
.withPagesPerDocument(1)
.build()
);
List<Document> pages = reader.get();
int pageCount = pages.size();
// Enrich metadata and tag diagram captions
List<Document> enriched = pages.stream()
.map(doc -> enrichDocument(doc, bookId.toString(), bookTitle))
.toList();
vectorStore.add(enriched);
book.setStatus(BookStatus.READY);
book.setPageCount(pageCount);
book.setProcessedAt(java.time.Instant.now());
bookRepository.save(book);
log.info("Finished embedding book {} — {} pages", bookId, pageCount);
} catch (Exception ex) {
log.error("Failed to embed book {}", bookId, ex);
book.setStatus(BookStatus.FAILED);
book.setErrorMessage(truncate(ex.getMessage(), 1000));
bookRepository.save(book);
}
}
private Document enrichDocument(Document doc, String bookId, String bookTitle) {
String content = doc.getText();
String chunkType = detectChunkType(content);
doc.getMetadata().put("book_id", bookId);
doc.getMetadata().put("book_title", bookTitle);
doc.getMetadata().put("chunk_type", chunkType);
return doc;
}
private String detectChunkType(String content) {
if (content != null) {
for (String line : content.split("\\r?\\n")) {
if (CAPTION_PATTERN.matcher(line.trim()).find()) {
return "diagram";
}
}
}
return "text";
}
public void deleteBookChunks(UUID bookId) {
log.info("Deleting vector chunks for book {}", bookId);
try {
FilterExpressionBuilder b = new FilterExpressionBuilder();
vectorStore.delete(b.eq("book_id", bookId.toString()).build());
} catch (Exception ex) {
log.warn("Could not delete vector chunks for book {}: {}", bookId, ex.getMessage());
}
}
private String truncate(String message, int maxLength) {
if (message == null) return null;
return message.length() <= maxLength ? message : message.substring(0, maxLength);
}
}
@@ -0,0 +1,15 @@
package com.aiteacher.book;
import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.stereotype.Repository;
import java.util.List;
import java.util.UUID;
@Repository
public interface BookRepository extends JpaRepository<Book, UUID> {
List<Book> findByStatus(BookStatus status);
boolean existsByStatus(BookStatus status);
}
@@ -0,0 +1,79 @@
package com.aiteacher.book;
import org.springframework.stereotype.Service;
import org.springframework.web.multipart.MultipartFile;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.UUID;
@Service
public class BookService {
private final BookRepository bookRepository;
private final BookEmbeddingService bookEmbeddingService;
public BookService(BookRepository bookRepository, BookEmbeddingService bookEmbeddingService) {
this.bookRepository = bookRepository;
this.bookEmbeddingService = bookEmbeddingService;
}
public Book upload(MultipartFile file) throws IOException {
String originalFilename = file.getOriginalFilename();
if (originalFilename == null || !originalFilename.toLowerCase().endsWith(".pdf")) {
throw new IllegalArgumentException("Only PDF files are accepted.");
}
String title = deriveTitle(originalFilename);
Book book = new Book(title, originalFilename, file.getSize());
book = bookRepository.save(book);
// Write to a temp file so the async task can read it
Path tempFile = Files.createTempFile("aiteacher-", "-" + book.getId() + ".pdf");
file.transferTo(tempFile.toFile());
UUID bookId = book.getId();
Path pdfPath = tempFile;
String bookTitle = title;
bookEmbeddingService.embedBook(bookId, bookTitle, pdfPath);
return book;
}
public List<Book> listAll() {
return bookRepository.findAll();
}
public Book getById(UUID id) {
return bookRepository.findById(id)
.orElseThrow(() -> new NoSuchElementException("Book not found."));
}
public void delete(UUID id) {
Book book = bookRepository.findById(id)
.orElseThrow(() -> new NoSuchElementException("Book not found."));
if (book.getStatus() == BookStatus.PROCESSING) {
throw new IllegalStateException("Cannot delete a book that is currently being processed.");
}
bookEmbeddingService.deleteBookChunks(id);
bookRepository.deleteById(id);
}
private String deriveTitle(String filename) {
// Strip .pdf extension and replace separators with spaces
String name = filename.replaceAll("(?i)\\.pdf$", "");
name = name.replaceAll("[-_]", " ");
// Capitalise first letter
if (!name.isEmpty()) {
name = Character.toUpperCase(name.charAt(0)) + name.substring(1);
}
return name;
}
}
@@ -0,0 +1,8 @@
package com.aiteacher.book;
public enum BookStatus {
PENDING,
PROCESSING,
READY,
FAILED
}
@@ -0,0 +1,8 @@
package com.aiteacher.book;
public class NoKnowledgeSourceException extends RuntimeException {
public NoKnowledgeSourceException(String message) {
super(message);
}
}