Compare commits
2 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| e5d53b4e80 | |||
| 5c641f4bcc |
@@ -151,6 +151,8 @@ npm run dev
|
|||||||
|
|
||||||
### Environment Variables
|
### Environment Variables
|
||||||
|
|
||||||
|
#### Backend
|
||||||
|
|
||||||
| Variable | Required | Description |
|
| Variable | Required | Description |
|
||||||
|----------|----------|-------------|
|
|----------|----------|-------------|
|
||||||
| `OPENAI_API_KEY` | Yes | OpenAI API key for embeddings and chat |
|
| `OPENAI_API_KEY` | Yes | OpenAI API key for embeddings and chat |
|
||||||
@@ -159,3 +161,14 @@ npm run dev
|
|||||||
| `DB_USERNAME` | Yes | Database username |
|
| `DB_USERNAME` | Yes | Database username |
|
||||||
| `DB_PASSWORD` | Yes | Database password |
|
| `DB_PASSWORD` | Yes | Database password |
|
||||||
| `FIGURE_STORAGE_PATH` | No | Base path for uploaded PDFs and extracted figures (default: `./uploads`) |
|
| `FIGURE_STORAGE_PATH` | No | Base path for uploaded PDFs and extracted figures (default: `./uploads`) |
|
||||||
|
| `UPLOAD_ENABLED` | No | Set to `false` to disable the book upload endpoint (default: `true`) |
|
||||||
|
| `DELETE_ENABLED` | No | Set to `false` to disable the book delete endpoint (default: `true`) |
|
||||||
|
|
||||||
|
#### Frontend
|
||||||
|
|
||||||
|
| Variable | Required | Description |
|
||||||
|
|----------|----------|-------------|
|
||||||
|
| `VITE_API_URL` | No | Backend API base URL (default: `/api/v1`) |
|
||||||
|
| `VITE_APP_PASSWORD` | Yes | Shared password for HTTP Basic auth (must match `APP_PASSWORD`) |
|
||||||
|
| `VITE_UPLOAD_ENABLED` | No | Set to `false` to hide the upload UI (default: `true`) |
|
||||||
|
| `VITE_DELETE_ENABLED` | No | Set to `false` to hide the delete button (default: `true`) |
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ package com.aiteacher.book;
|
|||||||
import com.aiteacher.document.FigureEntity;
|
import com.aiteacher.document.FigureEntity;
|
||||||
import com.aiteacher.document.FigureRepository;
|
import com.aiteacher.document.FigureRepository;
|
||||||
import com.aiteacher.document.MarkdownStorageService;
|
import com.aiteacher.document.MarkdownStorageService;
|
||||||
|
import org.springframework.beans.factory.annotation.Value;
|
||||||
import org.springframework.http.HttpStatus;
|
import org.springframework.http.HttpStatus;
|
||||||
import org.springframework.http.MediaType;
|
import org.springframework.http.MediaType;
|
||||||
import org.springframework.http.ResponseEntity;
|
import org.springframework.http.ResponseEntity;
|
||||||
@@ -22,6 +23,12 @@ public class BookController {
|
|||||||
private final FigureRepository figureRepository;
|
private final FigureRepository figureRepository;
|
||||||
private final MarkdownStorageService markdownStorageService;
|
private final MarkdownStorageService markdownStorageService;
|
||||||
|
|
||||||
|
@Value("${app.features.upload-enabled:true}")
|
||||||
|
private boolean uploadEnabled;
|
||||||
|
|
||||||
|
@Value("${app.features.delete-enabled:true}")
|
||||||
|
private boolean deleteEnabled;
|
||||||
|
|
||||||
public BookController(BookService bookService, FigureRepository figureRepository,
|
public BookController(BookService bookService, FigureRepository figureRepository,
|
||||||
MarkdownStorageService markdownStorageService) {
|
MarkdownStorageService markdownStorageService) {
|
||||||
this.bookService = bookService;
|
this.bookService = bookService;
|
||||||
@@ -31,6 +38,7 @@ public class BookController {
|
|||||||
|
|
||||||
@PostMapping(consumes = "multipart/form-data")
|
@PostMapping(consumes = "multipart/form-data")
|
||||||
public ResponseEntity<?> upload(@RequestParam("file") MultipartFile file) throws IOException {
|
public ResponseEntity<?> upload(@RequestParam("file") MultipartFile file) throws IOException {
|
||||||
|
if (!uploadEnabled) return ResponseEntity.status(HttpStatus.METHOD_NOT_ALLOWED).build();
|
||||||
Book book = bookService.upload(file);
|
Book book = bookService.upload(file);
|
||||||
return ResponseEntity.status(HttpStatus.ACCEPTED).body(toSummaryResponse(book));
|
return ResponseEntity.status(HttpStatus.ACCEPTED).body(toSummaryResponse(book));
|
||||||
}
|
}
|
||||||
@@ -51,6 +59,7 @@ public class BookController {
|
|||||||
|
|
||||||
@DeleteMapping("/{id}")
|
@DeleteMapping("/{id}")
|
||||||
public ResponseEntity<Void> delete(@PathVariable UUID id) {
|
public ResponseEntity<Void> delete(@PathVariable UUID id) {
|
||||||
|
if (!deleteEnabled) return ResponseEntity.status(HttpStatus.METHOD_NOT_ALLOWED).build();
|
||||||
bookService.delete(id);
|
bookService.delete(id);
|
||||||
return ResponseEntity.noContent().build();
|
return ResponseEntity.noContent().build();
|
||||||
}
|
}
|
||||||
@@ -64,9 +73,9 @@ public class BookController {
|
|||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
@GetMapping(value = "/{id}/pages/{pageNumber}/markdown", produces = MediaType.TEXT_PLAIN_VALUE)
|
@GetMapping(value = "/{id}/pages/{pageNumber}/html", produces = MediaType.TEXT_HTML_VALUE)
|
||||||
public ResponseEntity<String> getPageMarkdown(@PathVariable UUID id,
|
public ResponseEntity<String> getPageHtml(@PathVariable UUID id,
|
||||||
@PathVariable int pageNumber) {
|
@PathVariable int pageNumber) {
|
||||||
bookService.getById(id); // 404 if not found
|
bookService.getById(id); // 404 if not found
|
||||||
try {
|
try {
|
||||||
return ResponseEntity.ok(markdownStorageService.getText(id, pageNumber));
|
return ResponseEntity.ok(markdownStorageService.getText(id, pageNumber));
|
||||||
|
|||||||
@@ -3,8 +3,6 @@ package com.aiteacher.book;
|
|||||||
import com.aiteacher.document.*;
|
import com.aiteacher.document.*;
|
||||||
import com.aiteacher.figure.FigureStorageService;
|
import com.aiteacher.figure.FigureStorageService;
|
||||||
|
|
||||||
import java.util.regex.Matcher;
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
import org.springframework.ai.document.Document;
|
import org.springframework.ai.document.Document;
|
||||||
@@ -38,15 +36,15 @@ public class BookEmbeddingService {
|
|||||||
private final FigureStorageService figureStorageService;
|
private final FigureStorageService figureStorageService;
|
||||||
private final MarkdownStorageService markdownStorageService;
|
private final MarkdownStorageService markdownStorageService;
|
||||||
|
|
||||||
private static final Pattern MARKER_PLACEHOLDER =
|
|
||||||
Pattern.compile("!\\[([^\\]]*)\\]\\(marker://([^)]+)\\)");
|
|
||||||
|
|
||||||
@Value("${app.embedding.batch-size:50}")
|
@Value("${app.embedding.batch-size:50}")
|
||||||
private int embeddingBatchSize;
|
private int embeddingBatchSize;
|
||||||
|
|
||||||
@Value("${app.embedding.batch-delay-ms:1000}")
|
@Value("${app.embedding.batch-delay-ms:1000}")
|
||||||
private long embeddingBatchDelayMs;
|
private long embeddingBatchDelayMs;
|
||||||
|
|
||||||
|
@Value("${app.embedding.skip-embedding:false}")
|
||||||
|
private boolean skipEmbedding;
|
||||||
|
|
||||||
public BookEmbeddingService(
|
public BookEmbeddingService(
|
||||||
VectorStore vectorStore,
|
VectorStore vectorStore,
|
||||||
BookRepository bookRepository,
|
BookRepository bookRepository,
|
||||||
@@ -94,8 +92,10 @@ public class BookEmbeddingService {
|
|||||||
ChapterEntity chapter = new ChapterEntity(chapterId, bookId, 1, bookTitle, 1);
|
ChapterEntity chapter = new ChapterEntity(chapterId, bookId, 1, bookTitle, 1);
|
||||||
chapterRepository.save(chapter);
|
chapterRepository.save(chapter);
|
||||||
|
|
||||||
// Step 1: Parse every page with Marker — correct reading order + pre-cropped figures
|
// Step 1: Parse with Marker — JSON (structured) + Markdown (per-page) in parallel
|
||||||
List<PageResult> pageResults = markerPageParser.parse(pdfPath);
|
ParsedBook parsed = markerPageParser.parse(pdfPath);
|
||||||
|
|
||||||
|
List<PageResult> pageResults = parsed.pages();
|
||||||
|
|
||||||
// Step 2: Build SectionEntity per page and persist
|
// Step 2: Build SectionEntity per page and persist
|
||||||
List<SectionEntity> sections = buildAndSaveSections(bookId, bookTitle, chapterId, pageResults);
|
List<SectionEntity> sections = buildAndSaveSections(bookId, bookTitle, chapterId, pageResults);
|
||||||
@@ -105,22 +105,24 @@ public class BookEmbeddingService {
|
|||||||
for (SectionEntity section : sections) {
|
for (SectionEntity section : sections) {
|
||||||
allChunks.addAll(textChunkingService.chunk(section, bookTitle));
|
allChunks.addAll(textChunkingService.chunk(section, bookTitle));
|
||||||
}
|
}
|
||||||
embedInBatches(allChunks, bookId);
|
if (skipEmbedding) {
|
||||||
log.info("Embedded {} text chunks for book {}", allChunks.size(), bookId);
|
log.info("skip-embedding=true — skipping text embedding for book {}", bookId);
|
||||||
|
} else {
|
||||||
|
embedInBatches(allChunks, bookId);
|
||||||
|
log.info("Embedded {} text chunks for book {}", allChunks.size(), bookId);
|
||||||
|
}
|
||||||
|
|
||||||
// Step 4: Decode pre-cropped figures from Marker output
|
// Step 4: Decode pre-cropped figures from Marker output
|
||||||
FigureExtractionService.ExtractionResult extraction =
|
FigureExtractionService.ExtractionResult extraction =
|
||||||
figureExtractionService.extract(bookId, chapterId, pageResults);
|
figureExtractionService.extract(bookId, chapterId, pageResults);
|
||||||
List<FigureEntity> figures = extraction.figures();
|
List<FigureEntity> figures = extraction.figures();
|
||||||
|
|
||||||
// Step 4b: Upload per-page markdown with resolved figure URLs to S3
|
// Step 4b: Save per-page HTML to S3, replacing Marker image src with API URLs
|
||||||
for (PageResult page : pageResults) {
|
parsed.htmlByPage().forEach((pageNumber, html) -> {
|
||||||
if (!page.markdown().isBlank()) {
|
String resolved = resolveImageSrcs(html, bookId, extraction.blockIdToFigureId());
|
||||||
String resolved = resolvePlaceholders(page.markdown(), bookId,
|
markdownStorageService.save(bookId, pageNumber, resolved);
|
||||||
extraction.blockIdToFigureId());
|
});
|
||||||
markdownStorageService.save(bookId, page.pageNumber(), resolved);
|
log.info("Saved {} HTML pages to S3 for book {}", parsed.htmlByPage().size(), bookId);
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Step 5: Vision analysis (description + visible text) → embed figure chunks
|
// Step 5: Vision analysis (description + visible text) → embed figure chunks
|
||||||
for (FigureEntity figure : figures) {
|
for (FigureEntity figure : figures) {
|
||||||
@@ -139,11 +141,12 @@ public class BookEmbeddingService {
|
|||||||
+ (analysis.imageText().isEmpty() ? "" : "\n" + analysis.imageText());
|
+ (analysis.imageText().isEmpty() ? "" : "\n" + analysis.imageText());
|
||||||
|
|
||||||
String embeddingId = UUID.randomUUID().toString();
|
String embeddingId = UUID.randomUUID().toString();
|
||||||
Document figureDoc = new Document(embeddingId, embeddingContent,
|
if (!skipEmbedding) {
|
||||||
buildFigureMetadata(figure, bookTitle, embeddingId, analysis.imageText()));
|
Document figureDoc = new Document(embeddingId, embeddingContent,
|
||||||
vectorStore.add(List.of(figureDoc));
|
buildFigureMetadata(figure, bookTitle, embeddingId, analysis.imageText()));
|
||||||
|
vectorStore.add(List.of(figureDoc));
|
||||||
figure.setCaptionEmbeddingId(UUID.fromString(embeddingId));
|
figure.setCaptionEmbeddingId(UUID.fromString(embeddingId));
|
||||||
|
}
|
||||||
figureRepository.save(figure);
|
figureRepository.save(figure);
|
||||||
}
|
}
|
||||||
log.info("Embedded {} figure chunks for book {}", figures.size(), bookId);
|
log.info("Embedded {} figure chunks for book {}", figures.size(), bookId);
|
||||||
@@ -252,25 +255,20 @@ public class BookEmbeddingService {
|
|||||||
return m;
|
return m;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Replaces {@code marker://{blockId}} placeholders with resolved API URLs. */
|
/**
|
||||||
private String resolvePlaceholders(String markdown, UUID bookId,
|
* Replaces Marker's {@code src='{blockId}'} image attributes with resolved API URLs.
|
||||||
Map<String, String> blockIdToFigureId) {
|
* Block IDs look like {@code /page/0/Figure/2}.
|
||||||
Matcher m = MARKER_PLACEHOLDER.matcher(markdown);
|
*/
|
||||||
StringBuilder sb = new StringBuilder();
|
private String resolveImageSrcs(String html, UUID bookId, Map<String, String> blockIdToFigureId) {
|
||||||
while (m.find()) {
|
for (Map.Entry<String, String> entry : blockIdToFigureId.entrySet()) {
|
||||||
String altText = m.group(1);
|
String blockId = entry.getKey();
|
||||||
String blockId = m.group(2);
|
String figureId = entry.getValue();
|
||||||
String figureId = blockIdToFigureId.get(blockId);
|
String apiUrl = "/api/v1/figures/" + bookId + "/" + figureId + ".png";
|
||||||
if (figureId != null) {
|
// Marker emits both single and double-quoted src attributes
|
||||||
String url = "/api/v1/figures/" + bookId + "/" + figureId + ".png";
|
html = html.replace("src='" + blockId + "'", "src='" + apiUrl + "'");
|
||||||
m.appendReplacement(sb, "");
|
|
||||||
} else {
|
|
||||||
m.appendReplacement(sb, ""); // figure was filtered out (too small, etc.)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
m.appendTail(sb);
|
return html;
|
||||||
return sb.toString().strip();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private String truncate(String msg, int max) {
|
private String truncate(String msg, int max) {
|
||||||
|
|||||||
@@ -18,17 +18,22 @@ import java.nio.file.Path;
|
|||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parses a PDF using the local Marker server ({@code POST /marker/upload}).
|
* Parses a PDF with a single call to the Marker server using {@code output_format=json}.
|
||||||
*
|
*
|
||||||
* <p>A single HTTP call returns:
|
* <p>The JSON response contains an {@code output} field that is itself a JSON string with a
|
||||||
|
* tree structure: the root has a {@code children} array where each item is a {@code Page} block.
|
||||||
|
* Each block carries an {@code html} field with {@code <content-ref src='blockId'>} placeholders
|
||||||
|
* that reference its {@code children} by ID.
|
||||||
|
*
|
||||||
|
* <p>{@link #jsonToHtml} mirrors the Marker Python {@code json_to_html} utility: it walks the
|
||||||
|
* tree recursively and resolves every {@code content-ref} with the rendered HTML of the
|
||||||
|
* referenced child block.
|
||||||
|
*
|
||||||
|
* <p>Returns a {@link ParsedBook} with:
|
||||||
* <ul>
|
* <ul>
|
||||||
* <li>Reading-order text blocks — correct for multi-column and scanned pages</li>
|
* <li>{@code pages} — one {@link PageResult} per non-empty page (drives embeddings)</li>
|
||||||
* <li>Section headings extracted from {@code SectionHeader} blocks</li>
|
* <li>{@code htmlByPage} — full resolved HTML per page (saved to S3 for the reader)</li>
|
||||||
* <li>Pre-cropped figure images as base64-encoded PNG in each {@code Figure} block's
|
|
||||||
* {@code images} map</li>
|
|
||||||
* </ul>
|
* </ul>
|
||||||
*
|
|
||||||
* <p>The response is mapped to one {@link PageResult} per page block.
|
|
||||||
*/
|
*/
|
||||||
@Service
|
@Service
|
||||||
public class MarkerPageParser {
|
public class MarkerPageParser {
|
||||||
@@ -36,24 +41,21 @@ public class MarkerPageParser {
|
|||||||
private static final Logger log = LoggerFactory.getLogger(MarkerPageParser.class);
|
private static final Logger log = LoggerFactory.getLogger(MarkerPageParser.class);
|
||||||
|
|
||||||
private static final Set<String> TEXT_BLOCK_TYPES = Set.of(
|
private static final Set<String> TEXT_BLOCK_TYPES = Set.of(
|
||||||
"Text", "TextInlineMath", "ListItem", "Table", "Code", "Equation",
|
"Text", "TextInlineMath", "ListItem", "Table", "TableOfContents", "Code", "Equation",
|
||||||
"Footnote", "Caption", "PageHeader", "PageFooter", "Handwriting"
|
"Footnote", "Caption", "PageHeader", "PageFooter", "Handwriting"
|
||||||
);
|
);
|
||||||
private static final Set<String> FIGURE_BLOCK_TYPES = Set.of("Figure", "Picture", "FigureGroup", "PictureGroup");
|
private static final Set<String> FIGURE_BLOCK_TYPES = Set.of("Figure", "Picture", "FigureGroup", "PictureGroup");
|
||||||
|
|
||||||
private final RestClient restClient;
|
private static final ObjectMapper MAPPER = new ObjectMapper();
|
||||||
private final ObjectMapper objectMapper;
|
|
||||||
|
|
||||||
public MarkerPageParser(@Qualifier("markerRestClient") RestClient restClient, ObjectMapper objectMapper) {
|
private final RestClient restClient;
|
||||||
|
|
||||||
|
public MarkerPageParser(@Qualifier("markerRestClient") RestClient restClient) {
|
||||||
this.restClient = restClient;
|
this.restClient = restClient;
|
||||||
this.objectMapper = objectMapper;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
public ParsedBook parse(Path pdfPath) {
|
||||||
* Parses the entire PDF and returns one {@link PageResult} per non-empty page.
|
log.info("Submitting {} to Marker (json)", pdfPath.getFileName());
|
||||||
*/
|
|
||||||
public List<PageResult> parse(Path pdfPath) {
|
|
||||||
log.info("Submitting {} to Marker for parsing", pdfPath.getFileName());
|
|
||||||
|
|
||||||
MultiValueMap<String, Object> body = new LinkedMultiValueMap<>();
|
MultiValueMap<String, Object> body = new LinkedMultiValueMap<>();
|
||||||
body.add("file", new FileSystemResource(pdfPath));
|
body.add("file", new FileSystemResource(pdfPath));
|
||||||
@@ -67,207 +69,219 @@ public class MarkerPageParser {
|
|||||||
.body(JsonNode.class);
|
.body(JsonNode.class);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
Path debugFile = Path.of("/tmp/marker-response-md.json");
|
Files.writeString(Path.of("/tmp/marker-response-json.json"), response.toPrettyString());
|
||||||
Files.writeString(debugFile, response.toPrettyString());
|
|
||||||
log.info("Marker response saved to {}", debugFile);
|
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
log.warn("Could not save Marker response to file", e);
|
log.warn("Could not save Marker response to /tmp/marker-response-json.json", e);
|
||||||
}
|
}
|
||||||
|
|
||||||
List<PageResult> results = parseResponse(response);
|
List<JsonNode> pageNodes = extractPages(response);
|
||||||
log.info("Marker produced {} page results from {}", results.size(), pdfPath.getFileName());
|
if (pageNodes.isEmpty()) {
|
||||||
return results;
|
log.warn("Marker returned no pages for {}", pdfPath.getFileName());
|
||||||
|
return new ParsedBook(List.of(), Map.of());
|
||||||
|
}
|
||||||
|
log.info("Marker returned {} pages for {}", pageNodes.size(), pdfPath.getFileName());
|
||||||
|
|
||||||
|
List<PageResult> pages = new ArrayList<>();
|
||||||
|
Map<Integer, String> htmlByPage = new LinkedHashMap<>();
|
||||||
|
|
||||||
|
for (int i = 0; i < pageNodes.size(); i++) {
|
||||||
|
JsonNode pageNode = pageNodes.get(i);
|
||||||
|
int pageNumber = i + 1; // 1-based
|
||||||
|
|
||||||
|
PageResult result = buildPageResult(pageNode, pageNumber);
|
||||||
|
String html = jsonToHtml(pageNode);
|
||||||
|
|
||||||
|
if (!result.orderedText().isBlank() || !result.figures().isEmpty()) {
|
||||||
|
pages.add(result);
|
||||||
|
htmlByPage.put(pageNumber, html);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
log.info("Marker produced {} non-empty pages from {}", pages.size(), pdfPath.getFileName());
|
||||||
|
return new ParsedBook(pages, htmlByPage);
|
||||||
}
|
}
|
||||||
|
|
||||||
// --- Private helpers ---
|
// ── Page extraction ───────────────────────────────────────────────────────
|
||||||
|
|
||||||
private List<PageResult> parseResponse(JsonNode response) {
|
/**
|
||||||
|
* Parses the {@code output} JSON string and returns the list of page nodes
|
||||||
|
* (the top-level {@code children} of the document root).
|
||||||
|
*/
|
||||||
|
private List<JsonNode> extractPages(JsonNode response) {
|
||||||
if (response == null) return List.of();
|
if (response == null) return List.of();
|
||||||
|
|
||||||
// The "output" field is a JSON-encoded string — parse it first.
|
|
||||||
// Fall back to treating the whole response as the root if "output" is absent.
|
|
||||||
JsonNode root;
|
|
||||||
JsonNode outputNode = response.path("output");
|
JsonNode outputNode = response.path("output");
|
||||||
if (!outputNode.isMissingNode() && outputNode.isTextual()) {
|
if (outputNode.isMissingNode()) {
|
||||||
try {
|
log.warn("Marker response has no 'output' field");
|
||||||
root = objectMapper.readTree(outputNode.asText());
|
|
||||||
} catch (tools.jackson.core.JacksonException e) {
|
|
||||||
log.warn("Could not parse Marker 'output' field as JSON", e);
|
|
||||||
return List.of();
|
|
||||||
}
|
|
||||||
} else if (!outputNode.isMissingNode()) {
|
|
||||||
root = outputNode;
|
|
||||||
} else {
|
|
||||||
root = response;
|
|
||||||
}
|
|
||||||
|
|
||||||
JsonNode children = root.path("children");
|
|
||||||
if (children.isMissingNode() || !children.isArray()) {
|
|
||||||
log.warn("Marker response has no 'children' array — empty result");
|
|
||||||
return List.of();
|
return List.of();
|
||||||
}
|
}
|
||||||
|
|
||||||
List<PageResult> results = new ArrayList<>();
|
|
||||||
int pageIndex = 0;
|
|
||||||
for (JsonNode pageBlock : children) {
|
|
||||||
String blockType = pageBlock.path("block_type").asText();
|
|
||||||
if (!"Page".equals(blockType)) continue;
|
|
||||||
|
|
||||||
int pageNumber = pageIndex + 1;
|
|
||||||
pageIndex++;
|
|
||||||
|
|
||||||
PageResult result = parsePage(pageBlock, pageNumber);
|
|
||||||
if (!result.orderedText().isBlank() || !result.figures().isEmpty()) {
|
|
||||||
results.add(result);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return results;
|
|
||||||
}
|
|
||||||
|
|
||||||
private PageResult parsePage(JsonNode pageBlock, int pageNumber) {
|
|
||||||
JsonNode children = pageBlock.path("children");
|
|
||||||
if (children.isMissingNode() || !children.isArray()) {
|
|
||||||
return new PageResult(pageNumber, "", null, List.of(), "");
|
|
||||||
}
|
|
||||||
|
|
||||||
StringBuilder textBuilder = new StringBuilder();
|
|
||||||
StringBuilder markdownBuilder = new StringBuilder();
|
|
||||||
String headingTitle = null;
|
|
||||||
List<PageResult.FigureData> figures = new ArrayList<>();
|
|
||||||
Set<Integer> consumed = new HashSet<>(); // indices of Caption nodes consumed by a figure
|
|
||||||
|
|
||||||
List<JsonNode> childList = new ArrayList<>();
|
|
||||||
children.forEach(childList::add);
|
|
||||||
|
|
||||||
for (int i = 0; i < childList.size(); i++) {
|
|
||||||
if (consumed.contains(i)) continue;
|
|
||||||
|
|
||||||
JsonNode child = childList.get(i);
|
|
||||||
String type = child.path("block_type").asText();
|
|
||||||
|
|
||||||
if ("SectionHeader".equals(type)) {
|
|
||||||
String heading = stripHtml(child.path("html").asText()).strip();
|
|
||||||
if (!heading.isEmpty() && headingTitle == null) {
|
|
||||||
headingTitle = heading;
|
|
||||||
}
|
|
||||||
appendText(textBuilder, heading);
|
|
||||||
appendMarkdown(markdownBuilder, "## " + heading);
|
|
||||||
|
|
||||||
} else if (TEXT_BLOCK_TYPES.contains(type)) {
|
|
||||||
String text = stripHtml(child.path("html").asText());
|
|
||||||
appendText(textBuilder, text);
|
|
||||||
appendMarkdown(markdownBuilder, text.strip());
|
|
||||||
|
|
||||||
} else if (FIGURE_BLOCK_TYPES.contains(type)) {
|
|
||||||
extractFigures(child, i, childList, figures, markdownBuilder, consumed);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return new PageResult(pageNumber, textBuilder.toString().strip(), headingTitle,
|
|
||||||
figures, markdownBuilder.toString().strip());
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Handles a figure/picture block at {@code index} in {@code siblings}.
|
|
||||||
* For group blocks (FigureGroup, PictureGroup) the image lives in a child Picture/Figure,
|
|
||||||
* and the caption is a sibling Caption child inside the group.
|
|
||||||
* For leaf blocks the caption is the next sibling in the page child list.
|
|
||||||
* Image refs are appended to {@code markdown} as {@code }.
|
|
||||||
* Consumed caption sibling indices are added to {@code consumed}.
|
|
||||||
*/
|
|
||||||
private void extractFigures(JsonNode block, int index, List<JsonNode> siblings,
|
|
||||||
List<PageResult.FigureData> out, StringBuilder markdown,
|
|
||||||
Set<Integer> consumed) {
|
|
||||||
String type = block.path("block_type").asText();
|
|
||||||
boolean isGroup = type.endsWith("Group");
|
|
||||||
|
|
||||||
if (isGroup) {
|
|
||||||
JsonNode groupChildren = block.path("children");
|
|
||||||
if (groupChildren.isMissingNode() || !groupChildren.isArray()) return;
|
|
||||||
|
|
||||||
String groupCaption = null;
|
|
||||||
for (JsonNode sub : groupChildren) {
|
|
||||||
if ("Caption".equals(sub.path("block_type").asText())) {
|
|
||||||
String c = stripHtml(sub.path("html").asText()).strip();
|
|
||||||
if (!c.isEmpty()) groupCaption = c;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (JsonNode sub : groupChildren) {
|
|
||||||
String subType = sub.path("block_type").asText();
|
|
||||||
if ("Figure".equals(subType) || "Picture".equals(subType)) {
|
|
||||||
String blockId = sub.path("id").asText();
|
|
||||||
byte[] imageBytes = extractImageBytes(sub, blockId);
|
|
||||||
if (imageBytes != null) {
|
|
||||||
out.add(new PageResult.FigureData(imageBytes, groupCaption, blockId));
|
|
||||||
String altText = groupCaption != null ? groupCaption : blockId;
|
|
||||||
appendMarkdown(markdown, "");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
String blockId = block.path("id").asText();
|
|
||||||
byte[] imageBytes = extractImageBytes(block, blockId);
|
|
||||||
if (imageBytes != null) {
|
|
||||||
String caption = null;
|
|
||||||
if (index + 1 < siblings.size()) {
|
|
||||||
JsonNode next = siblings.get(index + 1);
|
|
||||||
if ("Caption".equals(next.path("block_type").asText())) {
|
|
||||||
String c = stripHtml(next.path("html").asText()).strip();
|
|
||||||
if (!c.isEmpty()) caption = c;
|
|
||||||
consumed.add(index + 1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
out.add(new PageResult.FigureData(imageBytes, caption, blockId));
|
|
||||||
String altText = caption != null ? caption : blockId;
|
|
||||||
appendMarkdown(markdown, "");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Extracts and base64-decodes the image bytes for this block.
|
|
||||||
* Marker stores images in the block's {@code images} map keyed by block ID.
|
|
||||||
*/
|
|
||||||
private byte[] extractImageBytes(JsonNode block, String blockId) {
|
|
||||||
JsonNode images = block.path("images");
|
|
||||||
if (images.isMissingNode() || images.isEmpty()) return null;
|
|
||||||
|
|
||||||
// Try the block's own ID first, then fall back to the first entry
|
|
||||||
JsonNode imgNode = images.path(blockId);
|
|
||||||
if (imgNode.isMissingNode()) {
|
|
||||||
imgNode = images.properties().stream()
|
|
||||||
.findFirst()
|
|
||||||
.map(e -> e.getValue())
|
|
||||||
.orElse(imgNode);
|
|
||||||
}
|
|
||||||
|
|
||||||
String base64 = imgNode.asText();
|
|
||||||
if (base64.isEmpty()) return null;
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
return Base64.getDecoder().decode(base64);
|
JsonNode root = MAPPER.readTree(outputNode.stringValue());
|
||||||
} catch (IllegalArgumentException ex) {
|
JsonNode children = root.path("children");
|
||||||
log.warn("Could not decode base64 image for block {}: {}", blockId, ex.getMessage());
|
if (children.isMissingNode() || !children.isArray()) {
|
||||||
return null;
|
log.warn("Marker output root has no 'children' array");
|
||||||
|
return List.of();
|
||||||
|
}
|
||||||
|
List<JsonNode> result = new ArrayList<>();
|
||||||
|
children.forEach(result::add);
|
||||||
|
return result;
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.warn("Could not parse Marker 'output' string as JSON: {}", e.getMessage());
|
||||||
|
return List.of();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── HTML rendering ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Java equivalent of the Marker Python {@code json_to_html} utility.
|
||||||
|
*
|
||||||
|
* <p>Algorithm:
|
||||||
|
* <ol>
|
||||||
|
* <li>If the block has no children, return its {@code html} as-is (leaf node).</li>
|
||||||
|
* <li>Otherwise recursively render each child, then replace every
|
||||||
|
* {@code <content-ref src='childId'>} placeholder in the block's own {@code html}
|
||||||
|
* with the rendered child HTML.</li>
|
||||||
|
* </ol>
|
||||||
|
*/
|
||||||
|
String jsonToHtml(JsonNode block) {
|
||||||
|
String html = str(block.path("html"));
|
||||||
|
|
||||||
|
// If the block carries image data, inject <img> data-URI tags.
|
||||||
|
// Marker stores base64 image bytes in block.images keyed by block ID.
|
||||||
|
// Picture/Figure leaf blocks have empty html, so this is the only way to
|
||||||
|
// get the image into the rendered output.
|
||||||
|
JsonNode images = block.path("images");
|
||||||
|
if (!images.isMissingNode() && !images.isNull() && !images.isEmpty()) {
|
||||||
|
StringBuilder imgTags = new StringBuilder();
|
||||||
|
images.properties().forEach(entry -> {
|
||||||
|
String base64 = str(entry.getValue());
|
||||||
|
if (!base64.isEmpty()) {
|
||||||
|
String mime = detectImageMime(base64);
|
||||||
|
imgTags.append("<img src=\"data:").append(mime)
|
||||||
|
.append(";base64,").append(base64).append("\">");
|
||||||
|
}
|
||||||
|
});
|
||||||
|
if (!imgTags.isEmpty()) {
|
||||||
|
html = html + imgTags;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
JsonNode children = block.path("children");
|
||||||
|
if (children.isMissingNode() || children.isNull() || !children.isArray() || children.isEmpty()) {
|
||||||
|
return html; // leaf node
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build id → rendered-html map for all direct children
|
||||||
|
Map<String, String> childHtml = new LinkedHashMap<>();
|
||||||
|
for (JsonNode child : children) {
|
||||||
|
String id = str(child.path("id"));
|
||||||
|
childHtml.put(id, jsonToHtml(child));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Replace every <content-ref src='id'></content-ref> with the child's HTML
|
||||||
|
for (Map.Entry<String, String> entry : childHtml.entrySet()) {
|
||||||
|
String ref = "<content-ref src='" + entry.getKey() + "'></content-ref>";
|
||||||
|
html = html.replace(ref, entry.getValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
return html;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── PageResult (text + figures for embeddings) ────────────────────────────
|
||||||
|
|
||||||
|
private PageResult buildPageResult(JsonNode pageBlock, int pageNumber) {
|
||||||
|
StringBuilder text = new StringBuilder();
|
||||||
|
String[] headingTitle = {null};
|
||||||
|
List<PageResult.FigureData> figures = new ArrayList<>();
|
||||||
|
|
||||||
|
walkBlock(pageBlock, text, headingTitle, figures);
|
||||||
|
return new PageResult(pageNumber, text.toString().strip(), headingTitle[0], figures);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Recursively walks the block tree, collecting text and figures in reading order. */
|
||||||
|
private void walkBlock(JsonNode block, StringBuilder text, String[] headingTitle,
|
||||||
|
List<PageResult.FigureData> figures) {
|
||||||
|
String type = str(block.path("block_type"));
|
||||||
|
|
||||||
|
if ("SectionHeader".equals(type)) {
|
||||||
|
String heading = stripHtml(str(block.path("html"))).strip();
|
||||||
|
if (!heading.isEmpty() && headingTitle[0] == null) headingTitle[0] = heading;
|
||||||
|
appendText(text, heading);
|
||||||
|
|
||||||
|
} else if (TEXT_BLOCK_TYPES.contains(type)) {
|
||||||
|
appendText(text, stripHtml(str(block.path("html"))));
|
||||||
|
|
||||||
|
} else if (FIGURE_BLOCK_TYPES.contains(type)) {
|
||||||
|
String caption = findCaption(block);
|
||||||
|
extractFigures(block, caption, figures);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recurse into children (content-ref ordering is implicit via tree order)
|
||||||
|
JsonNode children = block.path("children");
|
||||||
|
if (!children.isMissingNode() && !children.isNull() && children.isArray()) {
|
||||||
|
for (JsonNode child : children) {
|
||||||
|
walkBlock(child, text, headingTitle, figures);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Finds the first Caption child inside a figure block, if any. */
|
||||||
|
private String findCaption(JsonNode figureBlock) {
|
||||||
|
JsonNode children = figureBlock.path("children");
|
||||||
|
if (children.isMissingNode() || !children.isArray()) return null;
|
||||||
|
for (JsonNode child : children) {
|
||||||
|
if ("Caption".equals(str(child.path("block_type")))) {
|
||||||
|
String caption = stripHtml(str(child.path("html"))).strip();
|
||||||
|
return caption.isEmpty() ? null : caption;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void extractFigures(JsonNode block, String caption, List<PageResult.FigureData> out) {
|
||||||
|
JsonNode images = block.path("images");
|
||||||
|
if (images.isMissingNode() || images.isEmpty()) return;
|
||||||
|
|
||||||
|
images.properties().forEach(entry -> {
|
||||||
|
String blockId = entry.getKey();
|
||||||
|
String base64 = str(entry.getValue());
|
||||||
|
if (base64.isEmpty()) return;
|
||||||
|
try {
|
||||||
|
byte[] bytes = Base64.getDecoder().decode(base64);
|
||||||
|
out.add(new PageResult.FigureData(bytes, caption, blockId));
|
||||||
|
} catch (IllegalArgumentException ex) {
|
||||||
|
log.warn("Could not decode base64 image for block {}: {}", blockId, ex.getMessage());
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Utilities ─────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
private void appendText(StringBuilder sb, String text) {
|
private void appendText(StringBuilder sb, String text) {
|
||||||
|
if (text == null) return;
|
||||||
String stripped = text.strip();
|
String stripped = text.strip();
|
||||||
if (stripped.isEmpty()) return;
|
if (stripped.isEmpty()) return;
|
||||||
if (sb.length() > 0) sb.append("\n\n");
|
if (sb.length() > 0) sb.append("\n\n");
|
||||||
sb.append(stripped);
|
sb.append(stripped);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void appendMarkdown(StringBuilder sb, String text) {
|
|
||||||
if (text == null || text.isBlank()) return;
|
|
||||||
if (sb.length() > 0) sb.append("\n\n");
|
|
||||||
sb.append(text.strip());
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Strips HTML tags and normalises whitespace. */
|
|
||||||
private String stripHtml(String html) {
|
private String stripHtml(String html) {
|
||||||
if (html == null || html.isEmpty()) return "";
|
if (html == null || html.isEmpty()) return "";
|
||||||
return html.replaceAll("<[^>]*>", "").replaceAll("\\s{2,}", " ").strip();
|
return html.replaceAll("<[^>]*>", "").replaceAll("\\s{2,}", " ").strip();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Detects MIME type from the first characters of a base64-encoded image. */
|
||||||
|
private static String detectImageMime(String base64) {
|
||||||
|
if (base64.startsWith("/9j/")) return "image/jpeg";
|
||||||
|
if (base64.startsWith("iVBOR")) return "image/png";
|
||||||
|
if (base64.startsWith("R0lGO")) return "image/gif";
|
||||||
|
if (base64.startsWith("UklGR")) return "image/webp";
|
||||||
|
return "image/png"; // safe fallback
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Null-safe string extraction from a JsonNode (Jackson 3: stringValue() returns null for non-strings). */
|
||||||
|
private static String str(JsonNode node) {
|
||||||
|
String v = node.stringValue();
|
||||||
|
return v != null ? v : "";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -10,8 +10,7 @@ public record PageResult(
|
|||||||
int pageNumber, // 1-based, derived from Marker page block index
|
int pageNumber, // 1-based, derived from Marker page block index
|
||||||
String orderedText, // full page text in correct reading order (blocks joined by \n\n)
|
String orderedText, // full page text in correct reading order (blocks joined by \n\n)
|
||||||
String headingTitle, // first SectionHeader block on page, or null
|
String headingTitle, // first SectionHeader block on page, or null
|
||||||
List<FigureData> figures, // extracted figure images (may be empty)
|
List<FigureData> figures // extracted figure images (may be empty)
|
||||||
String markdown // markdown representation with marker://{blockId} image placeholders
|
|
||||||
) {
|
) {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -0,0 +1,16 @@
|
|||||||
|
package com.aiteacher.document;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Result of a full Marker parse: structured page data (from JSON) plus
|
||||||
|
* native per-page markdown (from the separate Markdown API call).
|
||||||
|
*
|
||||||
|
* @param pages one entry per non-empty page, derived from the chunks response
|
||||||
|
* @param htmlByPage concatenated block HTML keyed by 1-based page number
|
||||||
|
*/
|
||||||
|
public record ParsedBook(
|
||||||
|
List<PageResult> pages,
|
||||||
|
Map<Integer, String> htmlByPage
|
||||||
|
) {}
|
||||||
@@ -53,7 +53,7 @@ public class S3MarkdownStorageService implements MarkdownStorageService {
|
|||||||
byte[] bytes = markdown.getBytes(StandardCharsets.UTF_8);
|
byte[] bytes = markdown.getBytes(StandardCharsets.UTF_8);
|
||||||
s3.putObject(
|
s3.putObject(
|
||||||
PutObjectRequest.builder().bucket(bucket).key(key)
|
PutObjectRequest.builder().bucket(bucket).key(key)
|
||||||
.contentType("text/markdown; charset=utf-8")
|
.contentType("text/html; charset=utf-8")
|
||||||
.contentLength((long) bytes.length).build(),
|
.contentLength((long) bytes.length).build(),
|
||||||
RequestBody.fromBytes(bytes));
|
RequestBody.fromBytes(bytes));
|
||||||
return key;
|
return key;
|
||||||
@@ -69,7 +69,7 @@ public class S3MarkdownStorageService implements MarkdownStorageService {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void deleteAll(UUID bookId) {
|
public void deleteAll(UUID bookId) {
|
||||||
String prefix = "markdown/" + bookId + "/";
|
String prefix = "html/" + bookId + "/";
|
||||||
try {
|
try {
|
||||||
List<ObjectIdentifier> toDelete = new ArrayList<>();
|
List<ObjectIdentifier> toDelete = new ArrayList<>();
|
||||||
s3.listObjectsV2Paginator(ListObjectsV2Request.builder()
|
s3.listObjectsV2Paginator(ListObjectsV2Request.builder()
|
||||||
@@ -92,6 +92,6 @@ public class S3MarkdownStorageService implements MarkdownStorageService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private static String key(UUID bookId, int pageNumber) {
|
private static String key(UUID bookId, int pageNumber) {
|
||||||
return "markdown/" + bookId + "/page-" + pageNumber + ".md";
|
return "html/" + bookId + "/page-" + pageNumber + ".html";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -52,6 +52,9 @@ logging:
|
|||||||
"[org.apache.pdfbox]": ERROR
|
"[org.apache.pdfbox]": ERROR
|
||||||
|
|
||||||
app:
|
app:
|
||||||
|
features:
|
||||||
|
upload-enabled: ${UPLOAD_ENABLED:true}
|
||||||
|
delete-enabled: ${DELETE_ENABLED:true}
|
||||||
auth:
|
auth:
|
||||||
password: ${APP_PASSWORD:changeme}
|
password: ${APP_PASSWORD:changeme}
|
||||||
figure-storage:
|
figure-storage:
|
||||||
@@ -64,5 +67,6 @@ app:
|
|||||||
embedding:
|
embedding:
|
||||||
batch-size: 20
|
batch-size: 20
|
||||||
batch-delay-ms: 2000
|
batch-delay-ms: 2000
|
||||||
|
skip-embedding: true
|
||||||
marker:
|
marker:
|
||||||
base-url: ${MARKER_BASE_URL:http://localhost:8000}
|
base-url: ${MARKER_BASE_URL:http://192.168.1.105:8000}
|
||||||
|
|||||||
@@ -5,3 +5,9 @@ VITE_API_URL=/api/v1
|
|||||||
|
|
||||||
# Shared password for HTTP Basic auth (must match APP_PASSWORD on the backend).
|
# Shared password for HTTP Basic auth (must match APP_PASSWORD on the backend).
|
||||||
VITE_APP_PASSWORD=changeme
|
VITE_APP_PASSWORD=changeme
|
||||||
|
|
||||||
|
# Set to 'false' to hide the upload UI (frontend). Also set UPLOAD_ENABLED=false on the backend to block the endpoint.
|
||||||
|
VITE_UPLOAD_ENABLED=true
|
||||||
|
|
||||||
|
# Set to 'false' to hide the delete button (frontend). Also set DELETE_ENABLED=false on the backend to block the endpoint.
|
||||||
|
VITE_DELETE_ENABLED=true
|
||||||
|
|||||||
@@ -64,11 +64,11 @@ body {
|
|||||||
Ubuntu, Cantarell, 'Fira Sans', 'Droid Sans', 'Helvetica Neue', sans-serif;
|
Ubuntu, Cantarell, 'Fira Sans', 'Droid Sans', 'Helvetica Neue', sans-serif;
|
||||||
background: #f0f4f8;
|
background: #f0f4f8;
|
||||||
color: #2d3748;
|
color: #2d3748;
|
||||||
min-height: 100vh;
|
height: 100vh;
|
||||||
}
|
}
|
||||||
|
|
||||||
#app {
|
#app {
|
||||||
min-height: 100vh;
|
height: 100vh;
|
||||||
display: flex;
|
display: flex;
|
||||||
flex-direction: column;
|
flex-direction: column;
|
||||||
}
|
}
|
||||||
@@ -133,6 +133,9 @@ body {
|
|||||||
|
|
||||||
.main-content {
|
.main-content {
|
||||||
flex: 1;
|
flex: 1;
|
||||||
|
min-height: 0;
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
padding: 2rem;
|
padding: 2rem;
|
||||||
max-width: 1200px;
|
max-width: 1200px;
|
||||||
margin: 0 auto;
|
margin: 0 auto;
|
||||||
|
|||||||
@@ -41,6 +41,7 @@
|
|||||||
Read
|
Read
|
||||||
</router-link>
|
</router-link>
|
||||||
<button
|
<button
|
||||||
|
v-if="deleteEnabled"
|
||||||
class="btn btn-danger"
|
class="btn btn-danger"
|
||||||
:disabled="book.status === 'PROCESSING' || deleting"
|
:disabled="book.status === 'PROCESSING' || deleting"
|
||||||
@click="$emit('delete', book.id)"
|
@click="$emit('delete', book.id)"
|
||||||
@@ -59,6 +60,7 @@ import type { Book } from '@/stores/bookStore'
|
|||||||
const props = defineProps<{
|
const props = defineProps<{
|
||||||
book: Book
|
book: Book
|
||||||
deleting?: boolean
|
deleting?: boolean
|
||||||
|
deleteEnabled?: boolean
|
||||||
}>()
|
}>()
|
||||||
|
|
||||||
defineEmits<{
|
defineEmits<{
|
||||||
|
|||||||
Vendored
+2
@@ -3,6 +3,8 @@
|
|||||||
interface ImportMetaEnv {
|
interface ImportMetaEnv {
|
||||||
readonly VITE_API_URL: string
|
readonly VITE_API_URL: string
|
||||||
readonly VITE_APP_PASSWORD: string
|
readonly VITE_APP_PASSWORD: string
|
||||||
|
readonly VITE_UPLOAD_ENABLED: string
|
||||||
|
readonly VITE_DELETE_ENABLED: string
|
||||||
}
|
}
|
||||||
|
|
||||||
interface ImportMeta {
|
interface ImportMeta {
|
||||||
|
|||||||
@@ -44,7 +44,6 @@
|
|||||||
<script setup lang="ts">
|
<script setup lang="ts">
|
||||||
import { ref, watch, onMounted } from 'vue'
|
import { ref, watch, onMounted } from 'vue'
|
||||||
import { useRoute } from 'vue-router'
|
import { useRoute } from 'vue-router'
|
||||||
import { marked } from 'marked'
|
|
||||||
import { api } from '@/services/api'
|
import { api } from '@/services/api'
|
||||||
import { useBookStore } from '@/stores/bookStore'
|
import { useBookStore } from '@/stores/bookStore'
|
||||||
import type { Book } from '@/stores/bookStore'
|
import type { Book } from '@/stores/bookStore'
|
||||||
@@ -104,15 +103,11 @@ async function loadPage(page: number) {
|
|||||||
activeBlobUrls = []
|
activeBlobUrls = []
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const res = await api.get<string>(`/books/${bookId}/pages/${page}/markdown`, {
|
const res = await api.get<string>(`/books/${bookId}/pages/${page}/html`, {
|
||||||
headers: { Accept: 'text/plain' },
|
headers: { Accept: 'text/html' },
|
||||||
responseType: 'text'
|
responseType: 'text'
|
||||||
})
|
})
|
||||||
const markdownText = res.data
|
let html = await resolveImages(res.data)
|
||||||
|
|
||||||
// Render markdown to HTML, then resolve image src via authenticated fetch
|
|
||||||
let html = await marked.parse(markdownText) as string
|
|
||||||
html = await resolveImages(html)
|
|
||||||
renderedHtml.value = html
|
renderedHtml.value = html
|
||||||
} catch (e: any) {
|
} catch (e: any) {
|
||||||
error.value = e.message ?? 'Failed to load page.'
|
error.value = e.message ?? 'Failed to load page.'
|
||||||
@@ -123,8 +118,8 @@ async function loadPage(page: number) {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Finds <img src="/api/v1/figures/..."> in the HTML, fetches each image
|
* Finds <img src="/api/v1/figures/..."> in the HTML, fetches each image
|
||||||
* with the authenticated axios instance (which carries Basic auth headers),
|
* through the authenticated axios instance, and replaces the src with a
|
||||||
* and replaces the src with a temporary blob URL so the browser can display it.
|
* temporary blob URL so the browser can render it without re-authenticating.
|
||||||
*/
|
*/
|
||||||
async function resolveImages(html: string): Promise<string> {
|
async function resolveImages(html: string): Promise<string> {
|
||||||
const srcPattern = /src="(\/api\/v1\/figures\/[^"]+)"/g
|
const srcPattern = /src="(\/api\/v1\/figures\/[^"]+)"/g
|
||||||
@@ -137,7 +132,7 @@ async function resolveImages(html: string): Promise<string> {
|
|||||||
await Promise.all(
|
await Promise.all(
|
||||||
unique.map(async (src) => {
|
unique.map(async (src) => {
|
||||||
try {
|
try {
|
||||||
const res = await api.get(src, { responseType: 'blob' })
|
const res = await api.get(src.replace(/^\/api\/v1/, ''), { responseType: 'blob' })
|
||||||
const blobUrl = URL.createObjectURL(res.data)
|
const blobUrl = URL.createObjectURL(res.data)
|
||||||
activeBlobUrls.push(blobUrl)
|
activeBlobUrls.push(blobUrl)
|
||||||
blobMap[src] = blobUrl
|
blobMap[src] = blobUrl
|
||||||
@@ -160,6 +155,8 @@ async function resolveImages(html: string): Promise<string> {
|
|||||||
gap: 1rem;
|
gap: 1rem;
|
||||||
max-width: 860px;
|
max-width: 860px;
|
||||||
margin: 0 auto;
|
margin: 0 auto;
|
||||||
|
flex: 1;
|
||||||
|
min-height: 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
.reader-header {
|
.reader-header {
|
||||||
@@ -238,6 +235,9 @@ async function resolveImages(html: string): Promise<string> {
|
|||||||
|
|
||||||
.reader-body {
|
.reader-body {
|
||||||
flex: 1;
|
flex: 1;
|
||||||
|
min-height: 0;
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
}
|
}
|
||||||
|
|
||||||
.reader-loading {
|
.reader-loading {
|
||||||
@@ -255,6 +255,9 @@ async function resolveImages(html: string): Promise<string> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
.reader-content {
|
.reader-content {
|
||||||
|
flex: 1;
|
||||||
|
min-height: 0;
|
||||||
|
overflow-y: auto;
|
||||||
padding: 2rem;
|
padding: 2rem;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,10 +1,10 @@
|
|||||||
<template>
|
<template>
|
||||||
<div class="upload-view">
|
<div class="upload-view">
|
||||||
<h1 class="page-title">Book Library</h1>
|
<h1 class="page-title">Book Library</h1>
|
||||||
<p class="page-subtitle">Upload medical textbooks (PDF) to build the knowledge base.</p>
|
<p v-if="uploadEnabled" class="page-subtitle">Upload medical textbooks (PDF) to build the knowledge base.</p>
|
||||||
|
|
||||||
<!-- Upload Section -->
|
<!-- Upload Section -->
|
||||||
<div class="upload-section card">
|
<div v-if="uploadEnabled" class="upload-section card">
|
||||||
<h2 class="section-title">Upload a Book</h2>
|
<h2 class="section-title">Upload a Book</h2>
|
||||||
|
|
||||||
<div
|
<div
|
||||||
@@ -87,6 +87,7 @@
|
|||||||
:key="book.id"
|
:key="book.id"
|
||||||
:book="book"
|
:book="book"
|
||||||
:deleting="deletingId === book.id"
|
:deleting="deletingId === book.id"
|
||||||
|
:delete-enabled="deleteEnabled"
|
||||||
@delete="handleDelete"
|
@delete="handleDelete"
|
||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
@@ -99,6 +100,9 @@ import { ref, onMounted, onUnmounted, inject } from 'vue'
|
|||||||
import { useBookStore } from '@/stores/bookStore'
|
import { useBookStore } from '@/stores/bookStore'
|
||||||
import BookCard from '@/components/BookCard.vue'
|
import BookCard from '@/components/BookCard.vue'
|
||||||
|
|
||||||
|
const uploadEnabled = import.meta.env.VITE_UPLOAD_ENABLED !== 'false'
|
||||||
|
const deleteEnabled = import.meta.env.VITE_DELETE_ENABLED !== 'false'
|
||||||
|
|
||||||
const bookStore = useBookStore()
|
const bookStore = useBookStore()
|
||||||
const showToast = inject<(msg: string, type?: 'error' | 'success') => void>('showToast')
|
const showToast = inject<(msg: string, type?: 'error' | 'success') => void>('showToast')
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user