enhance page parsing using json output and html

This commit is contained in:
Adrien
2026-04-05 21:55:30 +02:00
parent ea1276dc2e
commit 5c641f4bcc
9 changed files with 292 additions and 258 deletions
@@ -10,8 +10,7 @@ public record PageResult(
int pageNumber, // 1-based, derived from Marker page block index
String orderedText, // full page text in correct reading order (blocks joined by \n\n)
String headingTitle, // first SectionHeader block on page, or null
List<FigureData> figures, // extracted figure images (may be empty)
String markdown // markdown representation with marker://{blockId} image placeholders
List<FigureData> figures // extracted figure images (may be empty)
) {
/**