first implementation
This commit is contained in:
@@ -0,0 +1,122 @@
|
||||
package com.aiteacher.book;
|
||||
|
||||
import jakarta.persistence.*;
|
||||
import java.time.Instant;
|
||||
import java.util.UUID;
|
||||
|
||||
@Entity
|
||||
@Table(name = "book")
|
||||
public class Book {
|
||||
|
||||
@Id
|
||||
@GeneratedValue(strategy = GenerationType.UUID)
|
||||
private UUID id;
|
||||
|
||||
@Column(name = "title", nullable = false, length = 500)
|
||||
private String title;
|
||||
|
||||
@Column(name = "file_name", nullable = false, length = 500)
|
||||
private String fileName;
|
||||
|
||||
@Column(name = "file_size_bytes", nullable = false)
|
||||
private long fileSizeBytes;
|
||||
|
||||
@Column(name = "page_count")
|
||||
private Integer pageCount;
|
||||
|
||||
@Enumerated(EnumType.STRING)
|
||||
@Column(name = "status", nullable = false, length = 20)
|
||||
private BookStatus status;
|
||||
|
||||
@Column(name = "error_message", columnDefinition = "TEXT")
|
||||
private String errorMessage;
|
||||
|
||||
@Column(name = "uploaded_at", nullable = false)
|
||||
private Instant uploadedAt;
|
||||
|
||||
@Column(name = "processed_at")
|
||||
private Instant processedAt;
|
||||
|
||||
// Constructors
|
||||
|
||||
public Book() {
|
||||
}
|
||||
|
||||
public Book(String title, String fileName, long fileSizeBytes) {
|
||||
this.title = title;
|
||||
this.fileName = fileName;
|
||||
this.fileSizeBytes = fileSizeBytes;
|
||||
this.status = BookStatus.PENDING;
|
||||
this.uploadedAt = Instant.now();
|
||||
}
|
||||
|
||||
// Getters & Setters
|
||||
|
||||
public UUID getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public String getTitle() {
|
||||
return title;
|
||||
}
|
||||
|
||||
public void setTitle(String title) {
|
||||
this.title = title;
|
||||
}
|
||||
|
||||
public String getFileName() {
|
||||
return fileName;
|
||||
}
|
||||
|
||||
public void setFileName(String fileName) {
|
||||
this.fileName = fileName;
|
||||
}
|
||||
|
||||
public long getFileSizeBytes() {
|
||||
return fileSizeBytes;
|
||||
}
|
||||
|
||||
public void setFileSizeBytes(long fileSizeBytes) {
|
||||
this.fileSizeBytes = fileSizeBytes;
|
||||
}
|
||||
|
||||
public Integer getPageCount() {
|
||||
return pageCount;
|
||||
}
|
||||
|
||||
public void setPageCount(Integer pageCount) {
|
||||
this.pageCount = pageCount;
|
||||
}
|
||||
|
||||
public BookStatus getStatus() {
|
||||
return status;
|
||||
}
|
||||
|
||||
public void setStatus(BookStatus status) {
|
||||
this.status = status;
|
||||
}
|
||||
|
||||
public String getErrorMessage() {
|
||||
return errorMessage;
|
||||
}
|
||||
|
||||
public void setErrorMessage(String errorMessage) {
|
||||
this.errorMessage = errorMessage;
|
||||
}
|
||||
|
||||
public Instant getUploadedAt() {
|
||||
return uploadedAt;
|
||||
}
|
||||
|
||||
public void setUploadedAt(Instant uploadedAt) {
|
||||
this.uploadedAt = uploadedAt;
|
||||
}
|
||||
|
||||
public Instant getProcessedAt() {
|
||||
return processedAt;
|
||||
}
|
||||
|
||||
public void setProcessedAt(Instant processedAt) {
|
||||
this.processedAt = processedAt;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,75 @@
|
||||
package com.aiteacher.book;
|
||||
|
||||
import org.springframework.http.HttpStatus;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.bind.annotation.*;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.time.Instant;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.UUID;
|
||||
|
||||
@RestController
|
||||
@RequestMapping("/api/v1/books")
|
||||
public class BookController {
|
||||
|
||||
private final BookService bookService;
|
||||
|
||||
public BookController(BookService bookService) {
|
||||
this.bookService = bookService;
|
||||
}
|
||||
|
||||
@PostMapping(consumes = "multipart/form-data")
|
||||
public ResponseEntity<?> upload(@RequestParam("file") MultipartFile file) throws IOException {
|
||||
Book book = bookService.upload(file);
|
||||
return ResponseEntity.status(HttpStatus.ACCEPTED).body(toSummaryResponse(book));
|
||||
}
|
||||
|
||||
@GetMapping
|
||||
public ResponseEntity<List<Map<String, Object>>> list() {
|
||||
List<Map<String, Object>> books = bookService.listAll().stream()
|
||||
.map(this::toFullResponse)
|
||||
.toList();
|
||||
return ResponseEntity.ok(books);
|
||||
}
|
||||
|
||||
@GetMapping("/{id}")
|
||||
public ResponseEntity<Map<String, Object>> get(@PathVariable UUID id) {
|
||||
Book book = bookService.getById(id);
|
||||
return ResponseEntity.ok(toFullResponse(book));
|
||||
}
|
||||
|
||||
@DeleteMapping("/{id}")
|
||||
public ResponseEntity<Void> delete(@PathVariable UUID id) {
|
||||
bookService.delete(id);
|
||||
return ResponseEntity.noContent().build();
|
||||
}
|
||||
|
||||
private Map<String, Object> toSummaryResponse(Book book) {
|
||||
return Map.of(
|
||||
"id", book.getId(),
|
||||
"title", book.getTitle(),
|
||||
"fileName", book.getFileName(),
|
||||
"status", book.getStatus().name(),
|
||||
"uploadedAt", book.getUploadedAt()
|
||||
);
|
||||
}
|
||||
|
||||
private Map<String, Object> toFullResponse(Book book) {
|
||||
var map = new java.util.LinkedHashMap<String, Object>();
|
||||
map.put("id", book.getId());
|
||||
map.put("title", book.getTitle());
|
||||
map.put("fileName", book.getFileName());
|
||||
map.put("fileSizeBytes", book.getFileSizeBytes());
|
||||
map.put("pageCount", book.getPageCount());
|
||||
map.put("status", book.getStatus().name());
|
||||
map.put("uploadedAt", book.getUploadedAt());
|
||||
map.put("processedAt", book.getProcessedAt());
|
||||
if (book.getErrorMessage() != null) {
|
||||
map.put("errorMessage", book.getErrorMessage());
|
||||
}
|
||||
return map;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,118 @@
|
||||
package com.aiteacher.book;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.ai.document.Document;
|
||||
import org.springframework.ai.reader.pdf.PagePdfDocumentReader;
|
||||
import org.springframework.ai.reader.pdf.config.PdfDocumentReaderConfig;
|
||||
import org.springframework.ai.vectorstore.VectorStore;
|
||||
import org.springframework.ai.vectorstore.filter.FilterExpressionBuilder;
|
||||
import org.springframework.core.io.FileSystemResource;
|
||||
import org.springframework.scheduling.annotation.Async;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.nio.file.Path;
|
||||
import java.util.List;
|
||||
import java.util.UUID;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
@Service
|
||||
public class BookEmbeddingService {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(BookEmbeddingService.class);
|
||||
|
||||
// Pattern to detect diagram/figure captions
|
||||
private static final Pattern CAPTION_PATTERN =
|
||||
Pattern.compile("^(Figure|Fig\\.|Table|Diagram)\\s+[\\d.]+", Pattern.CASE_INSENSITIVE);
|
||||
|
||||
private final VectorStore vectorStore;
|
||||
private final BookRepository bookRepository;
|
||||
|
||||
public BookEmbeddingService(VectorStore vectorStore, BookRepository bookRepository) {
|
||||
this.vectorStore = vectorStore;
|
||||
this.bookRepository = bookRepository;
|
||||
}
|
||||
|
||||
@Async
|
||||
public void embedBook(UUID bookId, String bookTitle, Path pdfPath) {
|
||||
log.info("Starting embedding for book {} ({})", bookId, bookTitle);
|
||||
|
||||
Book book = bookRepository.findById(bookId).orElse(null);
|
||||
if (book == null) {
|
||||
log.warn("Book {} not found, skipping embedding", bookId);
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
book.setStatus(BookStatus.PROCESSING);
|
||||
bookRepository.save(book);
|
||||
|
||||
PagePdfDocumentReader reader = new PagePdfDocumentReader(
|
||||
new FileSystemResource(pdfPath.toFile()),
|
||||
PdfDocumentReaderConfig.builder()
|
||||
.withPagesPerDocument(1)
|
||||
.build()
|
||||
);
|
||||
|
||||
List<Document> pages = reader.get();
|
||||
int pageCount = pages.size();
|
||||
|
||||
// Enrich metadata and tag diagram captions
|
||||
List<Document> enriched = pages.stream()
|
||||
.map(doc -> enrichDocument(doc, bookId.toString(), bookTitle))
|
||||
.toList();
|
||||
|
||||
vectorStore.add(enriched);
|
||||
|
||||
book.setStatus(BookStatus.READY);
|
||||
book.setPageCount(pageCount);
|
||||
book.setProcessedAt(java.time.Instant.now());
|
||||
bookRepository.save(book);
|
||||
|
||||
log.info("Finished embedding book {} — {} pages", bookId, pageCount);
|
||||
|
||||
} catch (Exception ex) {
|
||||
log.error("Failed to embed book {}", bookId, ex);
|
||||
book.setStatus(BookStatus.FAILED);
|
||||
book.setErrorMessage(truncate(ex.getMessage(), 1000));
|
||||
bookRepository.save(book);
|
||||
}
|
||||
}
|
||||
|
||||
private Document enrichDocument(Document doc, String bookId, String bookTitle) {
|
||||
String content = doc.getText();
|
||||
String chunkType = detectChunkType(content);
|
||||
|
||||
doc.getMetadata().put("book_id", bookId);
|
||||
doc.getMetadata().put("book_title", bookTitle);
|
||||
doc.getMetadata().put("chunk_type", chunkType);
|
||||
|
||||
return doc;
|
||||
}
|
||||
|
||||
private String detectChunkType(String content) {
|
||||
if (content != null) {
|
||||
for (String line : content.split("\\r?\\n")) {
|
||||
if (CAPTION_PATTERN.matcher(line.trim()).find()) {
|
||||
return "diagram";
|
||||
}
|
||||
}
|
||||
}
|
||||
return "text";
|
||||
}
|
||||
|
||||
public void deleteBookChunks(UUID bookId) {
|
||||
log.info("Deleting vector chunks for book {}", bookId);
|
||||
try {
|
||||
FilterExpressionBuilder b = new FilterExpressionBuilder();
|
||||
vectorStore.delete(b.eq("book_id", bookId.toString()).build());
|
||||
} catch (Exception ex) {
|
||||
log.warn("Could not delete vector chunks for book {}: {}", bookId, ex.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
private String truncate(String message, int maxLength) {
|
||||
if (message == null) return null;
|
||||
return message.length() <= maxLength ? message : message.substring(0, maxLength);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
package com.aiteacher.book;
|
||||
|
||||
import org.springframework.data.jpa.repository.JpaRepository;
|
||||
import org.springframework.stereotype.Repository;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.UUID;
|
||||
|
||||
@Repository
|
||||
public interface BookRepository extends JpaRepository<Book, UUID> {
|
||||
|
||||
List<Book> findByStatus(BookStatus status);
|
||||
|
||||
boolean existsByStatus(BookStatus status);
|
||||
}
|
||||
@@ -0,0 +1,79 @@
|
||||
package com.aiteacher.book;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.List;
|
||||
import java.util.NoSuchElementException;
|
||||
import java.util.UUID;
|
||||
|
||||
@Service
|
||||
public class BookService {
|
||||
|
||||
private final BookRepository bookRepository;
|
||||
private final BookEmbeddingService bookEmbeddingService;
|
||||
|
||||
public BookService(BookRepository bookRepository, BookEmbeddingService bookEmbeddingService) {
|
||||
this.bookRepository = bookRepository;
|
||||
this.bookEmbeddingService = bookEmbeddingService;
|
||||
}
|
||||
|
||||
public Book upload(MultipartFile file) throws IOException {
|
||||
String originalFilename = file.getOriginalFilename();
|
||||
if (originalFilename == null || !originalFilename.toLowerCase().endsWith(".pdf")) {
|
||||
throw new IllegalArgumentException("Only PDF files are accepted.");
|
||||
}
|
||||
|
||||
String title = deriveTitle(originalFilename);
|
||||
|
||||
Book book = new Book(title, originalFilename, file.getSize());
|
||||
book = bookRepository.save(book);
|
||||
|
||||
// Write to a temp file so the async task can read it
|
||||
Path tempFile = Files.createTempFile("aiteacher-", "-" + book.getId() + ".pdf");
|
||||
file.transferTo(tempFile.toFile());
|
||||
|
||||
UUID bookId = book.getId();
|
||||
Path pdfPath = tempFile;
|
||||
String bookTitle = title;
|
||||
|
||||
bookEmbeddingService.embedBook(bookId, bookTitle, pdfPath);
|
||||
|
||||
return book;
|
||||
}
|
||||
|
||||
public List<Book> listAll() {
|
||||
return bookRepository.findAll();
|
||||
}
|
||||
|
||||
public Book getById(UUID id) {
|
||||
return bookRepository.findById(id)
|
||||
.orElseThrow(() -> new NoSuchElementException("Book not found."));
|
||||
}
|
||||
|
||||
public void delete(UUID id) {
|
||||
Book book = bookRepository.findById(id)
|
||||
.orElseThrow(() -> new NoSuchElementException("Book not found."));
|
||||
|
||||
if (book.getStatus() == BookStatus.PROCESSING) {
|
||||
throw new IllegalStateException("Cannot delete a book that is currently being processed.");
|
||||
}
|
||||
|
||||
bookEmbeddingService.deleteBookChunks(id);
|
||||
bookRepository.deleteById(id);
|
||||
}
|
||||
|
||||
private String deriveTitle(String filename) {
|
||||
// Strip .pdf extension and replace separators with spaces
|
||||
String name = filename.replaceAll("(?i)\\.pdf$", "");
|
||||
name = name.replaceAll("[-_]", " ");
|
||||
// Capitalise first letter
|
||||
if (!name.isEmpty()) {
|
||||
name = Character.toUpperCase(name.charAt(0)) + name.substring(1);
|
||||
}
|
||||
return name;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,8 @@
|
||||
package com.aiteacher.book;
|
||||
|
||||
public enum BookStatus {
|
||||
PENDING,
|
||||
PROCESSING,
|
||||
READY,
|
||||
FAILED
|
||||
}
|
||||
@@ -0,0 +1,8 @@
|
||||
package com.aiteacher.book;
|
||||
|
||||
public class NoKnowledgeSourceException extends RuntimeException {
|
||||
|
||||
public NoKnowledgeSourceException(String message) {
|
||||
super(message);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user