first implementation - image/drawing integration
This commit is contained in:
@@ -47,6 +47,16 @@ spring:
|
||||
max-size: 8
|
||||
queue-capacity: 50
|
||||
|
||||
logging:
|
||||
level:
|
||||
"[org.apache.pdfbox]": ERROR
|
||||
|
||||
app:
|
||||
auth:
|
||||
password: ${APP_PASSWORD:changeme}
|
||||
figure-storage:
|
||||
base-path: ${FIGURE_STORAGE_PATH:./uploads}
|
||||
min-image-size-px: 100
|
||||
embedding:
|
||||
batch-size: 20
|
||||
batch-delay-ms: 2000
|
||||
|
||||
@@ -0,0 +1,28 @@
|
||||
-- ============================================================
|
||||
-- V4: Document hierarchy — chapter and section tables
|
||||
-- Supports parent-child retrieval pattern for RAG precision.
|
||||
-- ============================================================
|
||||
|
||||
CREATE TABLE IF NOT EXISTS chapter (
|
||||
id VARCHAR(200) PRIMARY KEY,
|
||||
book_id UUID NOT NULL REFERENCES book(id) ON DELETE CASCADE,
|
||||
number INT NOT NULL DEFAULT 1,
|
||||
title VARCHAR(500),
|
||||
page_start INT,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS section (
|
||||
id VARCHAR(200) PRIMARY KEY,
|
||||
chapter_id VARCHAR(200) NOT NULL REFERENCES chapter(id) ON DELETE CASCADE,
|
||||
book_id UUID NOT NULL REFERENCES book(id) ON DELETE CASCADE,
|
||||
number VARCHAR(50),
|
||||
title VARCHAR(500),
|
||||
page_start INT NOT NULL,
|
||||
page_end INT NOT NULL,
|
||||
full_text TEXT NOT NULL,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_section_book ON section(book_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_section_chapter ON section(chapter_id);
|
||||
@@ -0,0 +1,29 @@
|
||||
-- ============================================================
|
||||
-- V5: Figures and chunk-to-figure reference table
|
||||
-- figure: metadata + file path for each extracted image
|
||||
-- chunk_figure_ref: links vector-store chunks to figures
|
||||
-- ============================================================
|
||||
|
||||
CREATE TABLE IF NOT EXISTS figure (
|
||||
id VARCHAR(200) PRIMARY KEY,
|
||||
book_id UUID NOT NULL REFERENCES book(id) ON DELETE CASCADE,
|
||||
section_id VARCHAR(200) REFERENCES section(id) ON DELETE SET NULL,
|
||||
chapter_id VARCHAR(200) REFERENCES chapter(id) ON DELETE SET NULL,
|
||||
label VARCHAR(100),
|
||||
caption TEXT,
|
||||
figure_type VARCHAR(50) NOT NULL,
|
||||
page INT NOT NULL,
|
||||
image_path VARCHAR(1000) NOT NULL,
|
||||
caption_embedding_id UUID,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS chunk_figure_ref (
|
||||
chunk_id UUID NOT NULL,
|
||||
figure_id VARCHAR(200) NOT NULL REFERENCES figure(id) ON DELETE CASCADE,
|
||||
mention_page INT,
|
||||
PRIMARY KEY (chunk_id, figure_id)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_figure_book ON figure(book_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_cfr_chunk ON chunk_figure_ref(chunk_id);
|
||||
Reference in New Issue
Block a user