5 Commits

Author SHA1 Message Date
Adrien 0c226483c0 fix deserialization error in native image 2026-04-18 20:46:16 +02:00
Adrien ff97c24a55 Add thai support in summary 2026-04-18 19:55:19 +02:00
Adrien c7a77af2f4 add new concept report 2026-04-18 17:54:54 +02:00
Adrien 5f03e1f41b improve topics and chat source display 2026-04-12 18:56:18 +02:00
Adrien c98fe9ceaa update readme 2026-04-12 18:25:12 +02:00
34 changed files with 2396 additions and 219 deletions
+72
View File
@@ -35,11 +35,13 @@ graph TD
EP3["Vision describe → embed caption"]
EP4["Chunk text → embed chunks"]
EP5["Link chunks ↔ figures"]
EP6["LLM enrich chunk\n(entities, facet, summary)\n→ chunk_metadata"]
EP1 --> EP2
EP1 --> EP4
EP2 --> EP3
EP4 --> EP5
EP3 --> EP5
EP4 --> EP6
end
subgraph "Retrieval Pipeline (per chat query)"
@@ -65,6 +67,50 @@ graph TD
end
```
### Concept Retrieval Pipeline (per concept report)
Concept retrieval is an alternative to the semantic-similarity flow above. It uses the
LLM-tagged `chunk_metadata` rows written at indexing time to exhaustively gather every
chunk that *concerns* a concept (e.g. "aneurysm"), bucketed by facet. One synthesis call
per facet yields a structured, multi-section report.
```mermaid
sequenceDiagram
participant User
participant FE as Frontend
participant BE as Backend (ConceptReportService)
participant Retr as ConceptRetriever
participant DB as chunk_metadata (GIN)
participant Vec as vector_store
participant LLM
User->>FE: Click "Generate Concept Report" on topic
FE->>BE: POST /api/v1/topics/{id}/concept-reports
loop per READY book
BE->>Retr: retrieveByConcept(topicName, bookId)
Retr->>DB: WHERE entities @> [canonical]
alt SQL hits found
DB-->>Retr: chunks grouped by facet
else no match (typo / synonym)
Retr->>Vec: similaritySearch topK=30
Vec-->>Retr: chunk ids
Retr->>DB: findByChunkIdIn → group by facet
end
end
BE->>BE: merge facets across books, assign global [S#]/[F#]
loop per non-empty facet
BE->>LLM: synthesize facet section (focused prompt)
LLM-->>BE: facet markdown
end
BE->>BE: persist concept_report
BE-->>FE: { facets[], sources[] }
FE->>User: render facet-labelled report + inline figures
```
Backfill path for already-embedded books:
`POST /api/v1/admin/books/{id}/enrich` scans `vector_store` for TEXT chunks missing
`chunk_metadata` rows and enriches them in place. Idempotent — re-running is a no-op.
## Marker API Response Structure
The PDF parsing pipeline calls a local [Marker](https://github.com/VikParuchuri/marker) server (`POST /marker/upload`).
@@ -179,6 +225,32 @@ mvn -Pnative package jib:build -DskipTests
mvn -Pnative jib:build -Djib.to.auth.username=admin -Djib.to.auth.password=""
```
### Backend build (buildah)
**JVM image** (`Dockerfile` — Eclipse Temurin 21):
```bash
buildah build \
--platform linux/arm64 \
--tag zot.immich-ad.ovh/ai-teacher-backend:latest \
backend/
buildah login zot.immich-ad.ovh
buildah push --tls-verify=false zot.immich-ad.ovh/ai-teacher-backend:latest
```
**Native image** (`Dockerfile.native` — GraalVM 25, produces a minimal Debian-slim image):
```bash
buildah build \
--platform linux/arm64 \
--file backend/Dockerfile.native \
--tag zot.immich-ad.ovh/ai-teacher-backend-native:latest \
backend/
buildah push --tls-verify=false zot.immich-ad.ovh/ai-teacher-backend-native:latest
```
### Frontend build
```
buildah build \
@@ -1,6 +1,8 @@
package com.aiteacher.book;
import com.aiteacher.document.*;
import com.aiteacher.enrichment.ChunkEnrichmentPipeline;
import com.aiteacher.enrichment.ChunkMetadataRepository;
import com.aiteacher.figure.FigureStorageService;
import org.slf4j.Logger;
@@ -35,6 +37,8 @@ public class BookEmbeddingService {
private final ChunkFigureRefRepository chunkFigureRefRepository;
private final FigureStorageService figureStorageService;
private final MarkdownStorageService markdownStorageService;
private final ChunkEnrichmentPipeline chunkEnrichmentPipeline;
private final ChunkMetadataRepository chunkMetadataRepository;
@Value("${app.embedding.batch-size:50}")
private int embeddingBatchSize;
@@ -58,7 +62,9 @@ public class BookEmbeddingService {
FigureRepository figureRepository,
ChunkFigureRefRepository chunkFigureRefRepository,
FigureStorageService figureStorageService,
MarkdownStorageService markdownStorageService) {
MarkdownStorageService markdownStorageService,
ChunkEnrichmentPipeline chunkEnrichmentPipeline,
ChunkMetadataRepository chunkMetadataRepository) {
this.vectorStore = vectorStore;
this.bookRepository = bookRepository;
this.markerPageParser = markerPageParser;
@@ -72,6 +78,8 @@ public class BookEmbeddingService {
this.chunkFigureRefRepository = chunkFigureRefRepository;
this.figureStorageService = figureStorageService;
this.markdownStorageService = markdownStorageService;
this.chunkEnrichmentPipeline = chunkEnrichmentPipeline;
this.chunkMetadataRepository = chunkMetadataRepository;
}
@Async
@@ -110,6 +118,14 @@ public class BookEmbeddingService {
} else {
embedInBatches(allChunks, bookId);
log.info("Embedded {} text chunks for book {}", allChunks.size(), bookId);
Map<String, SectionEntity> sectionsById = new HashMap<>();
for (SectionEntity s : sections) sectionsById.put(s.getId(), s);
try {
chunkEnrichmentPipeline.enrichAndPersist(allChunks, sectionsById, bookTitle);
} catch (Exception ex) {
log.warn("Chunk enrichment failed for book {} — backfill endpoint can recover: {}",
bookId, ex.getMessage());
}
}
// Step 4: Decode pre-cropped figures from Marker output
@@ -200,6 +216,8 @@ public class BookEmbeddingService {
sectionRepository.deleteAllByBookId(bookId);
chapterRepository.deleteAllByBookId(bookId);
chunkMetadataRepository.deleteByBookId(bookId);
FilterExpressionBuilder b = new FilterExpressionBuilder();
vectorStore.delete(b.eq("book_id", bookId.toString()).build());
} catch (Exception ex) {
@@ -0,0 +1,52 @@
package com.aiteacher.concept;
import com.aiteacher.topic.Topic;
import com.aiteacher.topic.TopicRepository;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.*;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.UUID;
import java.util.stream.Collectors;
@RestController
@RequestMapping("/api/v1/topics/{id}/concept-reports")
public class ConceptReportController {
private final TopicRepository topicRepository;
private final ConceptReportService conceptReportService;
public ConceptReportController(TopicRepository topicRepository,
ConceptReportService conceptReportService) {
this.topicRepository = topicRepository;
this.conceptReportService = conceptReportService;
}
@PostMapping
public ResponseEntity<ConceptReportResponse> generate(
@PathVariable String id,
@RequestParam(defaultValue = "en") String language) {
Topic topic = topicRepository.findById(id)
.orElseThrow(() -> new NoSuchElementException("Topic not found."));
return ResponseEntity.ok(conceptReportService.generateReport(topic, language));
}
@GetMapping
public ResponseEntity<List<SavedConceptReportItem>> list(@PathVariable String id) {
topicRepository.findById(id)
.orElseThrow(() -> new NoSuchElementException("Topic not found."));
return ResponseEntity.ok(conceptReportService.listReports(id));
}
@GetMapping("/{reportId}")
public ResponseEntity<ConceptReportResponse> get(@PathVariable String id,
@PathVariable UUID reportId) {
topicRepository.findById(id)
.orElseThrow(() -> new NoSuchElementException("Topic not found."));
Map<String, String> topicNames = topicRepository.findAll().stream()
.collect(Collectors.toMap(Topic::getId, Topic::getName, (a, b) -> a));
return ResponseEntity.ok(conceptReportService.getReport(reportId, topicNames));
}
}
@@ -0,0 +1,48 @@
package com.aiteacher.concept;
import jakarta.persistence.*;
import java.time.Instant;
import java.util.UUID;
@Entity
@Table(name = "concept_report")
public class ConceptReportEntity {
@Id
@GeneratedValue(strategy = GenerationType.UUID)
private UUID id;
@Column(name = "topic_id", nullable = false, length = 100)
private String topicId;
@Column(name = "report_number", nullable = false)
private int reportNumber;
@Column(name = "facets_json", nullable = false, columnDefinition = "TEXT")
private String facetsJson;
@Column(name = "sources_json", nullable = false, columnDefinition = "TEXT")
private String sourcesJson;
@Column(name = "generated_at", nullable = false)
private Instant generatedAt;
protected ConceptReportEntity() {}
public ConceptReportEntity(String topicId, int reportNumber, String facetsJson,
String sourcesJson, Instant generatedAt) {
this.topicId = topicId;
this.reportNumber = reportNumber;
this.facetsJson = facetsJson;
this.sourcesJson = sourcesJson;
this.generatedAt = generatedAt;
}
public UUID getId() { return id; }
public String getTopicId() { return topicId; }
public int getReportNumber() { return reportNumber; }
public String getFacetsJson() { return facetsJson; }
public String getSourcesJson() { return sourcesJson; }
public Instant getGeneratedAt() { return generatedAt; }
}
@@ -0,0 +1,13 @@
package com.aiteacher.concept;
import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.stereotype.Repository;
import java.util.List;
import java.util.UUID;
@Repository
public interface ConceptReportRepository extends JpaRepository<ConceptReportEntity, UUID> {
long countByTopicId(String topicId);
List<ConceptReportEntity> findByTopicIdOrderByReportNumberAsc(String topicId);
}
@@ -0,0 +1,24 @@
package com.aiteacher.concept;
import com.aiteacher.topic.TopicSummaryResponse.SourceReference;
import java.time.Instant;
import java.util.List;
import java.util.UUID;
public record ConceptReportResponse(
UUID id,
int reportNumber,
String topicId,
String topicName,
List<FacetSection> facets,
List<SourceReference> sources,
Instant generatedAt
) {
public record FacetSection(
String facetKey,
String title,
String markdown,
List<String> refLabels
) {}
}
@@ -0,0 +1,299 @@
package com.aiteacher.concept;
import com.aiteacher.book.Book;
import com.aiteacher.book.BookRepository;
import com.aiteacher.book.BookStatus;
import com.aiteacher.book.NoKnowledgeSourceException;
import com.aiteacher.document.FigureEntity;
import com.aiteacher.document.SectionEntity;
import com.aiteacher.enrichment.ConceptFacet;
import com.aiteacher.topic.Topic;
import com.aiteacher.topic.TopicSummaryResponse.SourceReference;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.ai.chat.client.ChatClient;
import org.springframework.stereotype.Service;
import java.time.Instant;
import java.util.*;
@Service
public class ConceptReportService {
private static final Logger log = LoggerFactory.getLogger(ConceptReportService.class);
private static final String SYSTEM_PROMPT = """
You are an expert neurosurgery educator. You write focused, facet-specific sections of
a structured concept report for highly experienced neurosurgeons. The audience wants
concise, clinically relevant teaching.
When writing a facet section:
- Stick strictly to the facet you are asked about (e.g. definition, complications).
- Cite claims using ONLY the reference labels provided in the context.
Do not invent page numbers, section titles, or labels not present in CONTEXT.
- Citation format: each citation must be a SINGLE label per bracket write `[S1], [S2]` or
`[S3] [F2]`. NEVER combine labels inside one bracket (no `[S1 S2]`, `[S1, S2]`, `[S1 2]`).
- Figures ([F#]) are actual images that will be rendered inline reference them when they
visually support your explanation.
- If CONTEXT is insufficient for the requested facet, write exactly:
"The uploaded books do not contain sufficient information on this aspect."
- Never hallucinate clinical information outside the provided context.
""";
private final ChatClient chatClient;
private final BookRepository bookRepository;
private final ConceptRetriever conceptRetriever;
private final ConceptReportRepository reportRepository;
private final ObjectMapper objectMapper;
public ConceptReportService(ChatClient chatClient,
BookRepository bookRepository,
ConceptRetriever conceptRetriever,
ConceptReportRepository reportRepository,
ObjectMapper objectMapper) {
this.chatClient = chatClient;
this.bookRepository = bookRepository;
this.conceptRetriever = conceptRetriever;
this.reportRepository = reportRepository;
this.objectMapper = objectMapper;
}
public ConceptReportResponse generateReport(Topic topic, String language) {
List<Book> readyBooks = bookRepository.findAll().stream()
.filter(b -> b.getStatus() == BookStatus.READY)
.toList();
if (readyBooks.isEmpty()) {
throw new NoKnowledgeSourceException(
"No books are available as knowledge sources. Please upload and process at least one book.");
}
Map<ConceptFacet, MergedFacet> merged = new EnumMap<>(ConceptFacet.class);
for (Book book : readyBooks) {
ConceptRetrievalResult result = conceptRetriever.retrieveByConcept(topic.getName(), book.getId());
result.byFacet().forEach((facet, bundle) -> merged
.computeIfAbsent(facet, k -> new MergedFacet())
.add(bundle));
}
// Global, deduplicated sources across all facets
List<SectionEntity> globalSections = new ArrayList<>();
Set<String> seenSections = new LinkedHashSet<>();
List<FigureEntity> globalFigures = new ArrayList<>();
Set<String> seenFigures = new LinkedHashSet<>();
for (MergedFacet mf : merged.values()) {
for (SectionEntity s : mf.sections) if (seenSections.add(s.getId())) globalSections.add(s);
for (FigureEntity f : mf.figures) if (seenFigures.add(f.getId())) globalFigures.add(f);
}
// Global label maps: section id -> "S#", figure id -> "F#"
Map<String, String> sectionLabel = new HashMap<>();
for (int i = 0; i < globalSections.size(); i++) {
sectionLabel.put(globalSections.get(i).getId(), "S" + (i + 1));
}
Map<String, String> figureLabel = new HashMap<>();
for (int i = 0; i < globalFigures.size(); i++) {
figureLabel.put(globalFigures.get(i).getId(), "F" + (i + 1));
}
List<ConceptReportResponse.FacetSection> facetSections = new ArrayList<>();
// Preserve enum declaration order for consistent UI rendering
for (ConceptFacet facet : ConceptFacet.values()) {
MergedFacet mf = merged.get(facet);
if (mf == null || mf.isEmpty()) continue;
if (facet == ConceptFacet.OTHER) continue; // skip OTHER bucket in the rendered report
String prompt = buildFacetPrompt(topic, facet, mf, sectionLabel, figureLabel, language);
String markdown = chatClient.prompt()
.system(SYSTEM_PROMPT)
.user(prompt)
.call()
.content();
List<String> refs = collectRefs(mf, sectionLabel, figureLabel);
facetSections.add(new ConceptReportResponse.FacetSection(
facet.name(), facet.displayTitle(), markdown != null ? markdown : "", refs));
}
List<SourceReference> sources = buildSources(globalSections, globalFigures, readyBooks);
Instant generatedAt = Instant.now();
int reportNumber = (int) reportRepository.countByTopicId(topic.getId()) + 1;
ConceptReportEntity entity = new ConceptReportEntity(
topic.getId(), reportNumber,
serialize(facetSections), serialize(sources), generatedAt);
entity = reportRepository.save(entity);
return new ConceptReportResponse(
entity.getId(), reportNumber, topic.getId(), topic.getName(),
facetSections, sources, generatedAt);
}
public List<SavedConceptReportItem> listReports(String topicId) {
return reportRepository.findByTopicIdOrderByReportNumberAsc(topicId).stream()
.map(e -> new SavedConceptReportItem(e.getId(), e.getReportNumber(), e.getGeneratedAt()))
.toList();
}
public ConceptReportResponse getReport(UUID reportId, Map<String, String> topicNamesById) {
ConceptReportEntity entity = reportRepository.findById(reportId)
.orElseThrow(() -> new NoSuchElementException("Concept report not found."));
List<ConceptReportResponse.FacetSection> facets = deserializeFacets(entity.getFacetsJson());
List<SourceReference> sources = deserializeSources(entity.getSourcesJson());
String topicName = topicNamesById.getOrDefault(entity.getTopicId(), entity.getTopicId());
return new ConceptReportResponse(
entity.getId(), entity.getReportNumber(), entity.getTopicId(), topicName,
facets, sources, entity.getGeneratedAt());
}
private String buildFacetPrompt(Topic topic, ConceptFacet facet, MergedFacet mf,
Map<String, String> sectionLabel,
Map<String, String> figureLabel,
String language) {
StringBuilder sb = new StringBuilder();
sb.append("CONCEPT: ").append(topic.getName()).append("\n");
sb.append("FACET: ").append(facet.displayTitle()).append("\n\n");
sb.append("CONTEXT:\n\n");
for (SectionEntity s : mf.sections) {
String label = sectionLabel.get(s.getId());
sb.append("[").append(label).append("] ")
.append(s.getTitle() != null ? s.getTitle() : "")
.append(", p.").append(s.getPageStart()).append("\n");
sb.append(s.getFullText()).append("\n\n");
}
if (!mf.figures.isEmpty()) {
sb.append("AVAILABLE FIGURES:\n");
for (FigureEntity f : mf.figures) {
String label = figureLabel.get(f.getId());
sb.append("[").append(label).append("] ")
.append(f.getLabel() != null ? f.getLabel() : "Figure")
.append(" (p.").append(f.getPage()).append("): ")
.append(f.getCaption() != null ? f.getCaption() : "")
.append("\n");
}
sb.append("\n");
}
sb.append("Write the ").append(facet.displayTitle()).append(" section of a concept report on \"")
.append(topic.getName())
.append("\". Stay strictly within this facet. Use the [S#]/[F#] labels above for citations.");
if ("th".equalsIgnoreCase(language)) {
sb.append("\n\nIMPORTANT: Write the narrative in Thai. ")
.append("Keep all medical, anatomical, surgical, pharmacological, and clinical ")
.append("terminology in English (e.g., cerebellopontine angle, glioblastoma, craniotomy, ")
.append("dexamethasone). Do NOT translate disease names, anatomical structures, drug names, ")
.append("procedures, eponyms, or imaging modalities. Translate only connective prose, ")
.append("explanations, and general descriptions. Citation labels [S#]/[F#] stay unchanged. ")
.append("The sentinel string for insufficient context must remain exactly: ")
.append("\"The uploaded books do not contain sufficient information on this aspect.\"");
}
return sb.toString();
}
private List<String> collectRefs(MergedFacet mf,
Map<String, String> sectionLabel,
Map<String, String> figureLabel) {
List<String> refs = new ArrayList<>();
for (SectionEntity s : mf.sections) {
String l = sectionLabel.get(s.getId());
if (l != null) refs.add(l);
}
for (FigureEntity f : mf.figures) {
String l = figureLabel.get(f.getId());
if (l != null) refs.add(l);
}
return refs;
}
private List<SourceReference> buildSources(List<SectionEntity> sections,
List<FigureEntity> figures,
List<Book> readyBooks) {
List<SourceReference> sources = new ArrayList<>();
for (int i = 0; i < sections.size(); i++) {
SectionEntity s = sections.get(i);
Book book = findBook(readyBooks, s.getBookId());
String title = book != null ? book.getTitle() : "Book";
String bookId = book != null ? book.getId().toString() : null;
sources.add(new SourceReference(
"TEXT", "S" + (i + 1), bookId, title, s.getPageStart(),
truncate(s.getFullText(), 500), null, null, null, null, null));
}
for (int i = 0; i < figures.size(); i++) {
FigureEntity f = figures.get(i);
Book book = findBook(readyBooks, f.getBookId());
String title = book != null ? book.getTitle() : "Book";
String bookId = book != null ? book.getId().toString() : null;
String filename = f.getImagePath().substring(f.getImagePath().lastIndexOf('/') + 1);
String imageUrl = "/api/v1/figures/" + f.getBookId() + "/" + filename;
sources.add(new SourceReference(
"FIGURE", "F" + (i + 1), bookId, title, f.getPage(),
null, f.getId(), f.getLabel(), f.getCaption(),
f.getFigureType().name(), imageUrl));
}
return sources;
}
private Book findBook(List<Book> books, UUID bookId) {
return books.stream().filter(b -> b.getId().equals(bookId)).findFirst().orElse(null);
}
private String serialize(Object value) {
try {
return objectMapper.writeValueAsString(value);
} catch (JsonProcessingException e) {
log.warn("Failed to serialize concept report field", e);
return "[]";
}
}
private List<ConceptReportResponse.FacetSection> deserializeFacets(String json) {
try {
return objectMapper.readValue(json,
objectMapper.getTypeFactory().constructCollectionType(
List.class, ConceptReportResponse.FacetSection.class));
} catch (JsonProcessingException e) {
log.warn("Failed to deserialize facets", e);
return List.of();
}
}
private List<SourceReference> deserializeSources(String json) {
try {
return objectMapper.readValue(json,
objectMapper.getTypeFactory().constructCollectionType(
List.class, SourceReference.class));
} catch (JsonProcessingException e) {
log.warn("Failed to deserialize sources", e);
return List.of();
}
}
private String truncate(String text, int maxChars) {
if (text == null) return "";
return text.length() <= maxChars ? text : text.substring(0, maxChars) + "";
}
private static class MergedFacet {
final List<SectionEntity> sections = new ArrayList<>();
final List<FigureEntity> figures = new ArrayList<>();
final Set<String> sectionIds = new HashSet<>();
final Set<String> figureIds = new HashSet<>();
void add(FacetBundle bundle) {
for (SectionEntity s : bundle.sections()) {
if (sectionIds.add(s.getId())) sections.add(s);
}
for (FigureEntity f : bundle.figures()) {
if (figureIds.add(f.getId())) figures.add(f);
}
}
boolean isEmpty() { return sections.isEmpty() && figures.isEmpty(); }
}
}
@@ -0,0 +1,10 @@
package com.aiteacher.concept;
import com.aiteacher.enrichment.ConceptFacet;
import java.util.Map;
public record ConceptRetrievalResult(
Map<ConceptFacet, FacetBundle> byFacet,
boolean usedFallback
) {}
@@ -0,0 +1,163 @@
package com.aiteacher.concept;
import com.aiteacher.document.*;
import com.aiteacher.enrichment.ChunkMetadataEntity;
import com.aiteacher.enrichment.ChunkMetadataRepository;
import com.aiteacher.enrichment.ConceptFacet;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.ai.document.Document;
import org.springframework.ai.vectorstore.SearchRequest;
import org.springframework.ai.vectorstore.VectorStore;
import org.springframework.ai.vectorstore.filter.FilterExpressionBuilder;
import org.springframework.stereotype.Service;
import java.util.*;
import java.util.stream.Collectors;
@Service
public class ConceptRetriever {
private static final Logger log = LoggerFactory.getLogger(ConceptRetriever.class);
private static final int FALLBACK_TOP_K = 30;
private static final int FIGURE_TOP_K = 6;
private final ChunkMetadataRepository metadataRepository;
private final VectorStore vectorStore;
private final SectionRepository sectionRepository;
private final FigureRepository figureRepository;
private final ChunkFigureRefRepository chunkFigureRefRepository;
public ConceptRetriever(ChunkMetadataRepository metadataRepository,
VectorStore vectorStore,
SectionRepository sectionRepository,
FigureRepository figureRepository,
ChunkFigureRefRepository chunkFigureRefRepository) {
this.metadataRepository = metadataRepository;
this.vectorStore = vectorStore;
this.sectionRepository = sectionRepository;
this.figureRepository = figureRepository;
this.chunkFigureRefRepository = chunkFigureRefRepository;
}
public ConceptRetrievalResult retrieveByConcept(String conceptKeyword, UUID bookId) {
String canonical = canonicalise(conceptKeyword);
List<ChunkMetadataEntity> hits = metadataRepository
.findByBookIdAndEntityContains(bookId, canonical);
boolean fallback = false;
if (hits.isEmpty()) {
log.debug("Entity match miss for '{}' in book {} — falling back to vector search", canonical, bookId);
fallback = true;
hits = vectorFallback(conceptKeyword, bookId);
}
if (hits.isEmpty()) {
return new ConceptRetrievalResult(Map.of(), fallback);
}
List<FigureEntity> semanticFigures = semanticFigureSearch(conceptKeyword, bookId);
Map<ConceptFacet, List<ChunkMetadataEntity>> grouped = hits.stream()
.collect(Collectors.groupingBy(
ChunkMetadataEntity::getFacet,
LinkedHashMap::new,
Collectors.toList()));
Map<ConceptFacet, FacetBundle> result = new LinkedHashMap<>();
for (Map.Entry<ConceptFacet, List<ChunkMetadataEntity>> entry : grouped.entrySet()) {
result.put(entry.getKey(), hydrate(entry.getValue(), semanticFigures));
}
return new ConceptRetrievalResult(result, fallback);
}
private List<ChunkMetadataEntity> vectorFallback(String query, UUID bookId) {
FilterExpressionBuilder b = new FilterExpressionBuilder();
List<Document> textHits = vectorStore.similaritySearch(
SearchRequest.builder()
.query(query)
.topK(FALLBACK_TOP_K)
.filterExpression(b.and(
b.eq("type", "TEXT"),
b.eq("book_id", bookId.toString())
).build())
.build()
);
List<UUID> chunkIds = textHits.stream()
.map(d -> {
try { return UUID.fromString(d.getId()); }
catch (Exception e) { return null; }
})
.filter(Objects::nonNull)
.toList();
if (chunkIds.isEmpty()) return List.of();
return metadataRepository.findByChunkIdIn(chunkIds);
}
private FacetBundle hydrate(List<ChunkMetadataEntity> chunks, List<FigureEntity> semanticFigures) {
List<String> sectionIds = chunks.stream()
.map(ChunkMetadataEntity::getSectionId)
.distinct()
.toList();
List<SectionEntity> sections = sectionIds.isEmpty()
? List.of()
: sectionRepository.findAllById(sectionIds);
List<UUID> chunkIds = chunks.stream().map(ChunkMetadataEntity::getChunkId).toList();
List<String> linkedFigureIds = chunkFigureRefRepository.findByChunkIdIn(chunkIds)
.stream()
.map(ChunkFigureRefEntity::getFigureId)
.distinct()
.toList();
List<FigureEntity> linkedFigures = linkedFigureIds.isEmpty()
? List.of()
: figureRepository.findAllById(linkedFigureIds);
// Merge caption-semantic-search figures with chunk-linked figures (dedupe by id, linked first)
Map<String, FigureEntity> merged = new LinkedHashMap<>();
linkedFigures.forEach(f -> merged.put(f.getId(), f));
semanticFigures.forEach(f -> merged.putIfAbsent(f.getId(), f));
List<String> summaries = chunks.stream()
.map(ChunkMetadataEntity::getSummary)
.filter(s -> s != null && !s.isBlank())
.distinct()
.toList();
return new FacetBundle(sections, new ArrayList<>(merged.values()), summaries);
}
private List<FigureEntity> semanticFigureSearch(String query, UUID bookId) {
FilterExpressionBuilder b = new FilterExpressionBuilder();
List<Document> figureHits = vectorStore.similaritySearch(
SearchRequest.builder()
.query(query)
.topK(FIGURE_TOP_K)
.filterExpression(b.and(
b.eq("type", "FIGURE"),
b.eq("book_id", bookId.toString())
).build())
.build()
);
List<String> figureIds = figureHits.stream()
.map(d -> (String) d.getMetadata().get("figure_id"))
.filter(Objects::nonNull)
.toList();
return figureIds.isEmpty() ? List.of() : figureRepository.findAllById(figureIds);
}
static String canonicalise(String raw) {
if (raw == null) return "";
String s = raw.trim().toLowerCase(Locale.ROOT);
if (s.endsWith("ies") && s.length() > 3) {
s = s.substring(0, s.length() - 3) + "y";
} else if (s.endsWith("es") && s.length() > 2) {
s = s.substring(0, s.length() - 2);
} else if (s.endsWith("s") && s.length() > 1 && !s.endsWith("ss")) {
s = s.substring(0, s.length() - 1);
}
return s;
}
}
@@ -0,0 +1,12 @@
package com.aiteacher.concept;
import com.aiteacher.document.FigureEntity;
import com.aiteacher.document.SectionEntity;
import java.util.List;
public record FacetBundle(
List<SectionEntity> sections,
List<FigureEntity> figures,
List<String> chunkSummaries
) {}
@@ -0,0 +1,10 @@
package com.aiteacher.concept;
import java.time.Instant;
import java.util.UUID;
public record SavedConceptReportItem(
UUID id,
int reportNumber,
Instant generatedAt
) {}
@@ -5,6 +5,8 @@ import org.springframework.aot.hint.RuntimeHints;
import org.springframework.aot.hint.RuntimeHintsRegistrar;
import org.springframework.aot.hint.TypeReference;
import java.util.List;
/**
* GraalVM native-image runtime hints for third-party libraries that use reflection
* or classpath resource scanning not covered by Spring Boot's AOT processor.
@@ -64,6 +66,20 @@ public class NativeHintsConfig implements RuntimeHintsRegistrar {
software.amazon.awssdk.services.s3.S3Client.class,
MemberCategory.INVOKE_PUBLIC_METHODS
);
// Jackson deserialization of records persisted as JSON in DB columns.
// These are reached only via ObjectMapper.readValue in services, so Spring's
// BindingReflectionHintsRegistrar does not auto-discover all accessors.
for (Class<?> type : List.of(
com.aiteacher.topic.TopicSummaryResponse.class,
com.aiteacher.topic.TopicSummaryResponse.SourceReference.class,
com.aiteacher.concept.ConceptReportResponse.class,
com.aiteacher.concept.ConceptReportResponse.FacetSection.class
)) {
hints.reflection().registerType(type,
MemberCategory.INVOKE_DECLARED_CONSTRUCTORS,
MemberCategory.INVOKE_DECLARED_METHODS);
}
}
private void registerJBossLogger(RuntimeHints hints, String className) {
@@ -0,0 +1,75 @@
package com.aiteacher.enrichment;
import com.aiteacher.document.SectionEntity;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.ai.document.Document;
import org.springframework.stereotype.Service;
import java.time.Instant;
import java.util.List;
import java.util.Map;
import java.util.UUID;
@Service
public class ChunkEnrichmentPipeline {
private static final Logger log = LoggerFactory.getLogger(ChunkEnrichmentPipeline.class);
private final ChunkEnrichmentService enrichmentService;
private final ChunkMetadataRepository metadataRepository;
public ChunkEnrichmentPipeline(ChunkEnrichmentService enrichmentService,
ChunkMetadataRepository metadataRepository) {
this.enrichmentService = enrichmentService;
this.metadataRepository = metadataRepository;
}
public void enrichAndPersist(List<Document> chunks,
Map<String, SectionEntity> sectionsById,
String bookTitle) {
int total = chunks.size();
int done = 0;
for (Document chunk : chunks) {
String sectionId = (String) chunk.getMetadata().get("section_id");
SectionEntity section = sectionId != null ? sectionsById.get(sectionId) : null;
UUID chunkId;
try {
chunkId = UUID.fromString(chunk.getId());
} catch (IllegalArgumentException ex) {
log.warn("Skipping chunk with non-UUID id '{}'", chunk.getId());
continue;
}
UUID bookId = extractBookId(chunk);
if (bookId == null || sectionId == null) {
log.warn("Skipping chunk {} missing book_id or section_id metadata", chunkId);
continue;
}
try {
ChunkEnrichmentResult result = enrichmentService.enrich(chunk.getText(), section, bookTitle);
ChunkMetadataEntity entity = new ChunkMetadataEntity(
chunkId, bookId, sectionId,
result.facet(), result.entities(), result.summary(),
ChunkEnrichmentService.MODEL_VERSION, Instant.now());
metadataRepository.save(entity);
} catch (Exception ex) {
log.warn("Enrichment failed for chunk {}: {}", chunkId, ex.getMessage());
}
done++;
if (done % 25 == 0) {
log.info("Enrichment progress: {}/{} chunks", done, total);
}
}
log.info("Enrichment complete: {}/{} chunks enriched", done, total);
}
private UUID extractBookId(Document chunk) {
Object raw = chunk.getMetadata().get("book_id");
if (raw == null) return null;
try {
return UUID.fromString(raw.toString());
} catch (IllegalArgumentException ex) {
return null;
}
}
}
@@ -0,0 +1,9 @@
package com.aiteacher.enrichment;
import java.util.List;
public record ChunkEnrichmentResult(
List<String> entities,
ConceptFacet facet,
String summary
) {}
@@ -0,0 +1,135 @@
package com.aiteacher.enrichment;
import com.aiteacher.document.SectionEntity;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.ai.chat.client.ChatClient;
import org.springframework.stereotype.Service;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
@Service
public class ChunkEnrichmentService {
public static final String MODEL_VERSION = "v1";
private static final int MAX_ENTITIES = 8;
private static final Logger log = LoggerFactory.getLogger(ChunkEnrichmentService.class);
private static final String SYSTEM_PROMPT = """
You are a medical indexing assistant that classifies neurosurgery textbook excerpts.
For each excerpt you receive, extract three fields:
- entities: the medical concepts, conditions, procedures, tools, or anatomical
structures the excerpt is ABOUT. Normalise each to lowercase, singular canonical
English form. Expand abbreviations (e.g. "SAH" -> "subarachnoid hemorrhage").
Avoid generic words ("patient", "technique"). Cap at %d entities.
- facet: exactly one of the following. Pick the SINGLE best fit based on the
excerpt's PRIMARY teaching purpose. Use OTHER only when nothing else applies.
DEFINITION defines the entity / syndrome / concept ("what is X").
ANATOMY neuroanatomy, vascular/tract relationships, operative
landmarks, anatomical variants.
PATHOPHYSIOLOGY mechanism of disease, etiology, natural history,
molecular/cellular basis.
EPIDEMIOLOGY incidence, prevalence, demographics, risk factors.
CLINICAL_PRESENTATION symptoms, signs, neurological exam findings, syndromes
as they present in patients.
IMAGING CT / MRI / angiography / DSA / ultrasound features and
interpretation. If the excerpt describes HOW something
looks on imaging, use IMAGING.
CLASSIFICATION named grading scales, staging systems, subtype
taxonomies (Hunt-Hess, WFNS, Fisher, Spetzler-Martin,
GCS, Karnofsky, mRS, Simpson, etc.). If the excerpt
defines or applies a named scale, use CLASSIFICATION
even if it is grounded in imaging or clinical exam.
INDICATIONS when to operate / treat / observe; patient selection
criteria; contraindications.
SURGICAL_TECHNIQUE operative approach, positioning, steps, landmarks,
instruments, implants, intraoperative monitoring.
NONSURGICAL_MANAGEMENT medical therapy, endovascular treatment, stereotactic
radiosurgery, conservative / observational management.
COMPLICATIONS intra- or postoperative complications, adverse events.
OUTCOMES_FOLLOWUP prognosis, morbidity/mortality rates, recurrence,
surveillance schedules, follow-up care.
OTHER history, philosophy, ethics, or anything not covered.
Disambiguation rules:
* A named grading scale => CLASSIFICATION (even when grounded in imaging/exam).
* Tools and implants described as part of an operation => SURGICAL_TECHNIQUE,
not a standalone facet.
* Illustrative case reports => CLINICAL_PRESENTATION.
* Imaging findings of complications => COMPLICATIONS, not IMAGING.
- summary: one or two sentences describing what the excerpt teaches.
Respond with the structured JSON requested. Do not fabricate content not present in
the excerpt.
""".formatted(MAX_ENTITIES);
private final ChatClient chatClient;
public ChunkEnrichmentService(ChatClient chatClient) {
this.chatClient = chatClient;
}
public ChunkEnrichmentResult enrich(String chunkText, SectionEntity section, String bookTitle) {
String userPrompt = buildUserPrompt(chunkText, section, bookTitle);
LlmOutput raw = chatClient.prompt()
.system(SYSTEM_PROMPT)
.user(userPrompt)
.call()
.entity(LlmOutput.class);
if (raw == null) {
log.warn("LLM returned null enrichment; defaulting to OTHER");
return new ChunkEnrichmentResult(List.of(), ConceptFacet.OTHER, "");
}
List<String> entities = normaliseEntities(raw.entities());
ConceptFacet facet = parseFacet(raw.facet());
String summary = raw.summary() != null ? raw.summary().strip() : "";
return new ChunkEnrichmentResult(entities, facet, summary);
}
private String buildUserPrompt(String chunkText, SectionEntity section, String bookTitle) {
String sectionTitle = section != null && section.getTitle() != null ? section.getTitle() : "";
return """
BOOK: %s
SECTION: %s
EXCERPT:
---
%s
---
""".formatted(bookTitle, sectionTitle, chunkText);
}
private List<String> normaliseEntities(List<String> raw) {
if (raw == null) return List.of();
List<String> out = new ArrayList<>();
for (String e : raw) {
if (e == null) continue;
String canonical = e.trim().toLowerCase(Locale.ROOT);
if (canonical.isEmpty()) continue;
if (!out.contains(canonical)) out.add(canonical);
if (out.size() >= MAX_ENTITIES) break;
}
return out;
}
private ConceptFacet parseFacet(String raw) {
if (raw == null) return ConceptFacet.OTHER;
try {
return ConceptFacet.valueOf(raw.trim().toUpperCase(Locale.ROOT));
} catch (IllegalArgumentException ex) {
log.warn("LLM returned unknown facet '{}', defaulting to OTHER", raw);
return ConceptFacet.OTHER;
}
}
// DTO for Spring AI structured output; facet is read as String so we can defend against bad values
public record LlmOutput(List<String> entities, String facet, String summary) {}
}
@@ -0,0 +1,71 @@
package com.aiteacher.enrichment;
import jakarta.persistence.*;
import org.hibernate.annotations.JdbcTypeCode;
import org.hibernate.type.SqlTypes;
import java.time.Instant;
import java.util.List;
import java.util.UUID;
@Entity
@Table(name = "chunk_metadata")
@org.hibernate.annotations.Check(
name = "chunk_metadata_facet_check",
constraints = "facet IN ('DEFINITION','ANATOMY','PATHOPHYSIOLOGY','EPIDEMIOLOGY'," +
"'CLINICAL_PRESENTATION','IMAGING','CLASSIFICATION','INDICATIONS'," +
"'SURGICAL_TECHNIQUE','NONSURGICAL_MANAGEMENT','COMPLICATIONS'," +
"'OUTCOMES_FOLLOWUP','OTHER')")
public class ChunkMetadataEntity {
@Id
@Column(name = "chunk_id", nullable = false)
private UUID chunkId;
@Column(name = "book_id", nullable = false)
private UUID bookId;
@Column(name = "section_id", nullable = false, length = 200)
private String sectionId;
@Enumerated(EnumType.STRING)
@Column(name = "facet", nullable = false, length = 32)
private ConceptFacet facet;
@JdbcTypeCode(SqlTypes.JSON)
@Column(name = "entities", nullable = false, columnDefinition = "jsonb")
private List<String> entities;
@Column(name = "summary", nullable = false, columnDefinition = "TEXT")
private String summary;
@Column(name = "model_version", nullable = false, length = 32)
private String modelVersion;
@Column(name = "enriched_at", nullable = false)
private Instant enrichedAt;
protected ChunkMetadataEntity() {}
public ChunkMetadataEntity(UUID chunkId, UUID bookId, String sectionId,
ConceptFacet facet, List<String> entities, String summary,
String modelVersion, Instant enrichedAt) {
this.chunkId = chunkId;
this.bookId = bookId;
this.sectionId = sectionId;
this.facet = facet;
this.entities = entities;
this.summary = summary;
this.modelVersion = modelVersion;
this.enrichedAt = enrichedAt;
}
public UUID getChunkId() { return chunkId; }
public UUID getBookId() { return bookId; }
public String getSectionId() { return sectionId; }
public ConceptFacet getFacet() { return facet; }
public List<String> getEntities() { return entities; }
public String getSummary() { return summary; }
public String getModelVersion() { return modelVersion; }
public Instant getEnrichedAt() { return enrichedAt; }
}
@@ -0,0 +1,36 @@
package com.aiteacher.enrichment;
import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.data.jpa.repository.Query;
import org.springframework.data.repository.query.Param;
import org.springframework.stereotype.Repository;
import org.springframework.transaction.annotation.Transactional;
import java.util.Collection;
import java.util.List;
import java.util.UUID;
@Repository
public interface ChunkMetadataRepository extends JpaRepository<ChunkMetadataEntity, UUID> {
long countByBookId(UUID bookId);
@Query(value = """
SELECT * FROM chunk_metadata
WHERE book_id = :bookId
AND entities @> to_jsonb(CAST(:entity AS text))
""", nativeQuery = true)
List<ChunkMetadataEntity> findByBookIdAndEntityContains(@Param("bookId") UUID bookId,
@Param("entity") String entity);
@Query(value = """
SELECT * FROM chunk_metadata
WHERE entities @> to_jsonb(CAST(:entity AS text))
""", nativeQuery = true)
List<ChunkMetadataEntity> findByEntityContains(@Param("entity") String entity);
List<ChunkMetadataEntity> findByChunkIdIn(Collection<UUID> chunkIds);
@Transactional
void deleteByBookId(UUID bookId);
}
@@ -0,0 +1,27 @@
package com.aiteacher.enrichment;
public enum ConceptFacet {
DEFINITION("Definition & Overview"),
ANATOMY("Anatomy"),
PATHOPHYSIOLOGY("Pathophysiology"),
EPIDEMIOLOGY("Epidemiology"),
CLINICAL_PRESENTATION("Clinical Presentation"),
IMAGING("Imaging"),
CLASSIFICATION("Classification & Grading"),
INDICATIONS("Indications & Patient Selection"),
SURGICAL_TECHNIQUE("Surgical Technique"),
NONSURGICAL_MANAGEMENT("Non-surgical Management"),
COMPLICATIONS("Complications"),
OUTCOMES_FOLLOWUP("Outcomes & Follow-up"),
OTHER("Other");
private final String displayTitle;
ConceptFacet(String displayTitle) {
this.displayTitle = displayTitle;
}
public String displayTitle() {
return displayTitle;
}
}
@@ -0,0 +1,138 @@
package com.aiteacher.enrichment;
import com.aiteacher.document.SectionEntity;
import com.aiteacher.document.SectionRepository;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.ai.document.Document;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Service;
import java.time.Instant;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.UUID;
import java.util.concurrent.ConcurrentHashMap;
@Service
public class EnrichmentBackfillService {
private static final Logger log = LoggerFactory.getLogger(EnrichmentBackfillService.class);
private final JdbcTemplate jdbcTemplate;
private final ChunkEnrichmentService enrichmentService;
private final ChunkMetadataRepository metadataRepository;
private final SectionRepository sectionRepository;
private final ObjectMapper objectMapper;
private final Map<UUID, BackfillProgress> progressByBook = new ConcurrentHashMap<>();
public EnrichmentBackfillService(JdbcTemplate jdbcTemplate,
ChunkEnrichmentService enrichmentService,
ChunkMetadataRepository metadataRepository,
SectionRepository sectionRepository,
ObjectMapper objectMapper) {
this.jdbcTemplate = jdbcTemplate;
this.enrichmentService = enrichmentService;
this.metadataRepository = metadataRepository;
this.sectionRepository = sectionRepository;
this.objectMapper = objectMapper;
}
public BackfillProgress getProgress(UUID bookId) {
return progressByBook.getOrDefault(bookId, BackfillProgress.idle());
}
@Async
public void backfillBook(UUID bookId, String bookTitle) {
List<Document> pending = listUnenrichedChunks(bookId);
int total = pending.size();
progressByBook.put(bookId, new BackfillProgress("RUNNING", total, 0, null));
log.info("Backfill starting for book {} — {} chunks pending", bookId, total);
int done = 0;
Map<String, SectionEntity> sectionCache = new HashMap<>();
for (Document chunk : pending) {
try {
String sectionId = (String) chunk.getMetadata().get("section_id");
SectionEntity section = sectionId != null
? sectionCache.computeIfAbsent(sectionId,
id -> sectionRepository.findById(id).orElse(null))
: null;
ChunkEnrichmentResult result = enrichmentService.enrich(chunk.getText(), section, bookTitle);
UUID chunkId = UUID.fromString(chunk.getId());
metadataRepository.save(new ChunkMetadataEntity(
chunkId, bookId, sectionId != null ? sectionId : "",
result.facet(), result.entities(), result.summary(),
ChunkEnrichmentService.MODEL_VERSION, Instant.now()));
} catch (Exception ex) {
log.warn("Backfill failed for chunk {} of book {}: {}", chunk.getId(), bookId, ex.getMessage());
}
done++;
progressByBook.put(bookId, new BackfillProgress("RUNNING", total, done, null));
}
progressByBook.put(bookId, new BackfillProgress("COMPLETED", total, done, null));
log.info("Backfill finished for book {} — {}/{} enriched", bookId, done, total);
}
private List<Document> listUnenrichedChunks(UUID bookId) {
// Left anti-join against chunk_metadata so re-runs are cheap.
String sql = """
SELECT vs.id, vs.content, vs.metadata::text AS metadata_text
FROM vector_store vs
LEFT JOIN chunk_metadata cm ON cm.chunk_id = vs.id
WHERE vs.metadata->>'book_id' = ?
AND vs.metadata->>'type' = 'TEXT'
AND cm.chunk_id IS NULL
""";
return jdbcTemplate.query(sql, (rs, rowNum) -> {
String id = rs.getString("id");
String content = rs.getString("content");
String metaJson = rs.getString("metadata_text");
Map<String, Object> meta = parseMetadata(metaJson);
return new Document(id, content != null ? content : "", meta);
}, bookId.toString());
}
private Map<String, Object> parseMetadata(String json) {
if (json == null || json.isBlank()) return Map.of();
try {
JsonNode node = objectMapper.readTree(json);
Map<String, Object> out = new HashMap<>();
node.properties().forEach(e -> {
JsonNode v = e.getValue();
if (v.isTextual()) out.put(e.getKey(), v.asText());
else if (v.isInt()) out.put(e.getKey(), v.asInt());
else if (v.isLong()) out.put(e.getKey(), v.asLong());
else if (v.isBoolean()) out.put(e.getKey(), v.asBoolean());
else out.put(e.getKey(), v.toString());
});
return out;
} catch (JsonProcessingException ex) {
log.warn("Failed to parse vector_store metadata JSON: {}", ex.getMessage());
return Map.of();
}
}
public Optional<Integer> countEnrichedChunks(UUID bookId) {
return Optional.of((int) metadataRepository.countByBookId(bookId));
}
public int countTotalTextChunks(UUID bookId) {
Integer n = jdbcTemplate.queryForObject(
"SELECT COUNT(*) FROM vector_store WHERE metadata->>'book_id' = ? AND metadata->>'type' = 'TEXT'",
Integer.class, bookId.toString());
return n != null ? n : 0;
}
public record BackfillProgress(String status, int chunksTotal, int chunksEnriched, String errorMessage) {
public static BackfillProgress idle() {
return new BackfillProgress("IDLE", 0, 0, null);
}
}
}
@@ -0,0 +1,50 @@
package com.aiteacher.enrichment;
import com.aiteacher.book.Book;
import com.aiteacher.book.BookRepository;
import org.springframework.http.HttpStatus;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.*;
import java.util.NoSuchElementException;
import java.util.UUID;
@RestController
@RequestMapping("/api/v1/admin/books/{id}/enrich")
public class EnrichmentController {
private final BookRepository bookRepository;
private final EnrichmentBackfillService backfillService;
public EnrichmentController(BookRepository bookRepository,
EnrichmentBackfillService backfillService) {
this.bookRepository = bookRepository;
this.backfillService = backfillService;
}
@PostMapping
public ResponseEntity<EnrichmentBackfillService.BackfillProgress> start(@PathVariable UUID id) {
Book book = bookRepository.findById(id)
.orElseThrow(() -> new NoSuchElementException("Book not found."));
backfillService.backfillBook(id, book.getTitle());
int total = backfillService.countTotalTextChunks(id);
int enriched = backfillService.countEnrichedChunks(id).orElse(0);
return ResponseEntity.status(HttpStatus.ACCEPTED)
.body(new EnrichmentBackfillService.BackfillProgress("RUNNING", total, enriched, null));
}
@GetMapping
public ResponseEntity<EnrichmentBackfillService.BackfillProgress> status(@PathVariable UUID id) {
bookRepository.findById(id)
.orElseThrow(() -> new NoSuchElementException("Book not found."));
EnrichmentBackfillService.BackfillProgress progress = backfillService.getProgress(id);
if ("IDLE".equals(progress.status())) {
int total = backfillService.countTotalTextChunks(id);
int enriched = backfillService.countEnrichedChunks(id).orElse(0);
progress = new EnrichmentBackfillService.BackfillProgress(
enriched >= total && total > 0 ? "COMPLETED" : "IDLE",
total, enriched, null);
}
return ResponseEntity.ok(progress);
}
}
@@ -26,11 +26,13 @@ public class TopicController {
}
@PostMapping("/{id}/summary")
public ResponseEntity<TopicSummaryResponse> generateSummary(@PathVariable String id) {
public ResponseEntity<TopicSummaryResponse> generateSummary(
@PathVariable String id,
@RequestParam(defaultValue = "en") String language) {
Topic topic = topicRepository.findById(id)
.orElseThrow(() -> new NoSuchElementException("Topic not found."));
TopicSummaryResponse response = topicSummaryService.generateSummary(topic);
TopicSummaryResponse response = topicSummaryService.generateSummary(topic, language);
return ResponseEntity.ok(response);
}
@@ -19,9 +19,11 @@ public record TopicSummaryResponse(
String bookId,
String bookTitle,
Integer page,
String chunkText,
String figureId,
String label,
String caption,
String figureType,
String imageUrl
) {
}
@@ -27,9 +27,9 @@ public class TopicSummaryService {
private static final Logger log = LoggerFactory.getLogger(TopicSummaryService.class);
private static final String SYSTEM_PROMPT = """
You are an expert neurosurgery educator. Your role is to provide accurate,
clinically relevant summaries based ONLY on the content retrieved from the
uploaded medical textbooks. Do not use any knowledge outside the provided context.
You are an expert neurosurgery educator. Your role is to provide accurate, detailed but synthetically concise educational reports on neurosurgery topics, based on the content retrieved from the uploaded medical textbooks. Your audience is highly experienced neurosurgeons, who are looking for a comprehensive yet digestible overview of a specific topic.
When generating reports, your primary goal is to distill the most important and clinically relevant information about the topic. This includes key concepts, anatomical details, surgical techniques, clinical considerations, and any other information that would be essential for a neurosurgeon to understand the topic thoroughly.
Base your reports on uploaded medical textbooks. Do not use any knowledge outside the provided context.
When answering:
- Structure your response clearly with key points
@@ -59,7 +59,7 @@ public class TopicSummaryService {
this.objectMapper = objectMapper;
}
public TopicSummaryResponse generateSummary(Topic topic) {
public TopicSummaryResponse generateSummary(Topic topic, String language) {
List<Book> readyBooks = bookRepository.findAll().stream()
.filter(b -> b.getStatus() == BookStatus.READY)
.toList();
@@ -79,10 +79,10 @@ public class TopicSummaryService {
allFigures.addAll(result.figures());
}
log.debug("Topic summary for '{}': {} sections, {} figures retrieved",
log.debug("Topic reports for '{}': {} sections, {} figures retrieved",
topic.getName(), allSections.size(), allFigures.size());
String contextPrompt = buildContextPrompt(question, allSections, allFigures);
String contextPrompt = buildContextPrompt(question, allSections, allFigures, language);
String summary = chatClient.prompt()
.system(SYSTEM_PROMPT)
.user(contextPrompt)
@@ -134,16 +134,16 @@ public class TopicSummaryService {
private String buildQuestion(Topic topic) {
return String.format(
"Provide a comprehensive educational summary of the following neurosurgery topic: " +
"%s. Topic description: %s. " +
"Include key concepts, diagrams, illustations and clinical considerations, and important details that a neurosurgeon should know.",
"Provide a comprehensive educational report of the following neurosurgery topic: " +
"%s. Topic description: %s. ",
topic.getName(), topic.getDescription()
);
}
private String buildContextPrompt(String question,
List<SectionEntity> sections,
List<FigureEntity> figures) {
List<FigureEntity> figures,
String language) {
StringBuilder sb = new StringBuilder();
if (!sections.isEmpty()) {
@@ -170,6 +170,17 @@ public class TopicSummaryService {
}
sb.append("QUESTION:\n").append(question);
if ("th".equalsIgnoreCase(language)) {
sb.append("\n\nIMPORTANT: Write the narrative in Thai. ")
.append("Keep all medical, anatomical, surgical, pharmacological, and clinical ")
.append("terminology in English (e.g., cerebellopontine angle, glioblastoma, craniotomy, ")
.append("dexamethasone). Do NOT translate disease names, anatomical structures, drug names, ")
.append("procedures, eponyms, or imaging modalities. Translate only connective prose, ")
.append("explanations, and general descriptions. Citation labels [S#]/[F#] stay unchanged. ")
.append("The sentinel string for insufficient context must remain exactly: ")
.append("\"The uploaded books do not contain sufficient information on this topic.\"");
}
return sb.toString();
}
@@ -188,7 +199,7 @@ public class TopicSummaryService {
String bookId = book != null ? book.getId().toString() : null;
sources.add(new TopicSummaryResponse.SourceReference(
"TEXT", "S" + (i + 1), bookId, title, s.getPageStart(),
null, null, null, null));
truncate(s.getFullText(), 500), null, null, null, null, null));
}
for (int i = 0; i < figures.size(); i++) {
@@ -203,7 +214,8 @@ public class TopicSummaryService {
String imageUrl = "/api/v1/figures/" + f.getBookId() + "/" + filename;
sources.add(new TopicSummaryResponse.SourceReference(
"FIGURE", "F" + (i + 1), bookId, title, f.getPage(),
f.getId(), f.getLabel(), f.getCaption(), imageUrl));
null, f.getId(), f.getLabel(), f.getCaption(),
f.getFigureType().name(), imageUrl));
}
return sources;
@@ -218,6 +230,11 @@ public class TopicSummaryService {
}
}
private String truncate(String text, int maxChars) {
if (text == null) return "";
return text.length() <= maxChars ? text : text.substring(0, maxChars) + "";
}
private List<TopicSummaryResponse.SourceReference> deserializeSources(String json) {
try {
return objectMapper.readValue(json,
+3 -2
View File
@@ -7,7 +7,7 @@ spring:
jpa:
hibernate:
ddl-auto: update
ddl-auto: none
show-sql: false
properties:
hibernate:
@@ -30,7 +30,8 @@ spring:
api-key: ${OPENAI_API_KEY:}
chat:
options:
model: gpt-4o-mini
model: o4-mini
reasoning-effort: high
embedding:
options:
model: "text-embedding-3-small"
@@ -0,0 +1,14 @@
CREATE TABLE chunk_metadata (
chunk_id UUID PRIMARY KEY,
book_id UUID NOT NULL,
section_id VARCHAR(200) NOT NULL,
facet VARCHAR(32) NOT NULL,
entities JSONB NOT NULL,
summary TEXT NOT NULL,
model_version VARCHAR(32) NOT NULL,
enriched_at TIMESTAMPTZ NOT NULL
);
CREATE INDEX idx_chunk_metadata_book ON chunk_metadata(book_id);
CREATE INDEX idx_chunk_metadata_book_facet ON chunk_metadata(book_id, facet);
CREATE INDEX idx_chunk_metadata_entities_gin ON chunk_metadata USING GIN (entities jsonb_path_ops);
@@ -0,0 +1,11 @@
CREATE TABLE concept_report (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
topic_id VARCHAR(100) NOT NULL,
report_number INT NOT NULL,
facets_json TEXT NOT NULL,
sources_json TEXT NOT NULL,
generated_at TIMESTAMPTZ NOT NULL,
UNIQUE (topic_id, report_number)
);
CREATE INDEX idx_concept_report_topic ON concept_report(topic_id, report_number);
@@ -0,0 +1,19 @@
ALTER TABLE chunk_metadata DROP CONSTRAINT IF EXISTS chunk_metadata_facet_check;
ALTER TABLE chunk_metadata
ADD CONSTRAINT chunk_metadata_facet_check
CHECK (facet IN (
'DEFINITION',
'ANATOMY',
'PATHOPHYSIOLOGY',
'EPIDEMIOLOGY',
'CLINICAL_PRESENTATION',
'IMAGING',
'CLASSIFICATION',
'INDICATIONS',
'SURGICAL_TECHNIQUE',
'NONSURGICAL_MANAGEMENT',
'COMPLICATIONS',
'OUTCOMES_FOLLOWUP',
'OTHER'
));
+172
View File
@@ -0,0 +1,172 @@
# Concept Retrieval via Indexing-Time Chunk Enrichment
## Context
Vector similarity alone can't answer "tell me everything about aneurysms." It surfaces the chunks most *linguistically* similar to the query, not the set of all chunks that *concern* the concept — and it has no notion of whether each chunk is a definition, a case, a technique, or a complication.
The unlock is to move intelligence from query time to indexing time: for every text chunk, use an LLM to extract **structured metadata** (entities, facet, summary). At retrieval time, concept lookup becomes an SQL filter (`entities @> ['aneurysm']`) bucketed by facet — deterministic, exhaustive, and organized by default. Vector search remains as a fallback for typos / synonyms and for ranking within a facet.
This plan covers: (1) defining the metadata schema, (2) enriching chunks during new book ingestion, (3) back-filling the already-embedded corpus via an admin endpoint, (4) a new concept retrieval path, and (5) a Topics-page UI to surface the result.
## Approach
### 1. Data model — new `chunk_metadata` table
Flyway migration `backend/src/main/resources/db/migration/V7__chunk_metadata.sql`:
```sql
CREATE TABLE chunk_metadata (
chunk_id VARCHAR(64) PRIMARY KEY, -- same UUID that TextChunkingService issues and stores in vectorstore
book_id UUID NOT NULL,
section_id VARCHAR(255) NOT NULL,
facet VARCHAR(32) NOT NULL, -- enum (see ConceptFacet)
entities JSONB NOT NULL, -- canonical lowercase string[]
summary TEXT NOT NULL,
model_version VARCHAR(32) NOT NULL, -- records which LLM/prompt version tagged this chunk
enriched_at TIMESTAMPTZ NOT NULL
);
CREATE INDEX idx_chunk_metadata_book ON chunk_metadata(book_id);
CREATE INDEX idx_chunk_metadata_book_facet ON chunk_metadata(book_id, facet);
CREATE INDEX idx_chunk_metadata_entities_gin ON chunk_metadata USING GIN (entities jsonb_path_ops);
```
Why `chunk_id` is the natural key: `TextChunkingService` already generates a UUID per chunk, uses it as the pgvector Document id, stores it in metadata, and it's the key in `ChunkFigureRefEntity` — so the table joins cleanly to everything already in place.
### 2. Enrichment service & facet taxonomy
New package `com.aiteacher.enrichment`:
- `ConceptFacet` enum — 13 values tailored to neurosurgery textbooks: `DEFINITION, ANATOMY, PATHOPHYSIOLOGY, EPIDEMIOLOGY, CLINICAL_PRESENTATION, IMAGING, CLASSIFICATION, INDICATIONS, SURGICAL_TECHNIQUE, NONSURGICAL_MANAGEMENT, COMPLICATIONS, OUTCOMES_FOLLOWUP, OTHER`. `OTHER` is mandatory so the LLM always has an out (no hallucinated bucketing). The prompt carries explicit disambiguation rules (named grading scales → `CLASSIFICATION`; imaging of a complication → `COMPLICATIONS`; tools inside an operation → `SURGICAL_TECHNIQUE`).
- `ChunkEnrichmentResult` — record `(List<String> entities, ConceptFacet facet, String summary)`
- `ChunkEnrichmentService` — single method `enrich(String chunkText, SectionEntity section, String bookTitle) → ChunkEnrichmentResult`. Uses Spring AI `ChatClient.prompt().call().entity(Class)` for structured output. The prompt gives: book title, section title, chunk text, the fixed facet enum list, and instructs the model to return JSON with entities normalised to lowercase singular canonical form (e.g. "aneurysms" → "aneurysm"; "SAH" → "subarachnoid hemorrhage"). Caps entities at ~8 per chunk.
- `ChunkMetadataEntity` + `ChunkMetadataRepository` — JPA entity/repo mirroring the table.
Model version string (e.g. `"v1"`) lives on the service and is stamped into each row so a future prompt rev can be rolled out by filtering `model_version <> 'v2'` in the backfill job.
### 3. Hook into new book ingestion
Modify `BookEmbeddingService.embedBook`:
```java
// Step 3: Chunk and embed text
List<Document> allChunks = new ArrayList<>();
for (SectionEntity section : sections) {
allChunks.addAll(textChunkingService.chunk(section, bookTitle));
}
if (skipEmbedding) { ... } else {
embedInBatches(allChunks, bookId);
chunkEnrichmentPipeline.enrichAndPersist(allChunks, sectionsById, bookTitle); // NEW
}
```
- `ChunkEnrichmentPipeline` — new orchestrator that iterates chunks, calls `ChunkEnrichmentService.enrich(...)` per chunk, saves `ChunkMetadataEntity` rows in batches, with the same throttle pattern as `embedInBatches`.
- Runs *after* embedding, not in place of it, so a failure in enrichment doesn't corrupt the vector store. On failure, log and continue — the backfill endpoint is the universal recovery path.
- Extend `deleteBookChunks` to also delete `chunk_metadata` rows so deletion stays consistent.
### 4. Backfill endpoint for already-embedded books
New `EnrichmentController` in `com.aiteacher.enrichment`:
- `POST /api/v1/admin/books/{id}/enrich` → kicks off async backfill, returns 202 with `{status, chunksTotal, chunksEnriched}`
- `GET /api/v1/admin/books/{id}/enrich` → returns progress
Backfill flow (`EnrichmentBackfillService.backfillBook(UUID bookId)`):
1. Query the pgvector storage table directly via `JdbcTemplate` for all chunks of the book:
```sql
SELECT id, content, metadata
FROM vector_store
WHERE metadata->>'book_id' = ? AND metadata->>'type' = 'TEXT'
```
2. Left-anti-join against `chunk_metadata` to skip already-enriched chunks → idempotent, resumable.
3. For each missing chunk: look up its `SectionEntity` via `section_id` in metadata, call `ChunkEnrichmentService.enrich`, write a `ChunkMetadataEntity` row.
4. Progress tracked in an in-memory `ConcurrentHashMap<UUID, BackfillProgress>` (POC scope — no cross-restart resumability needed because the left-anti-join makes re-runs free).
5. `@Async` on the backfill method using the same executor as `embedBook`.
### 5. Concept retrieval path
New `com.aiteacher.concept.ConceptRetriever`:
```java
public ConceptRetrievalResult retrieveByConcept(String conceptKeyword, UUID bookId) {
String canonical = canonicalise(conceptKeyword); // lowercase, trim, simple plural strip
// 5a. Primary: SQL entity match, grouped by facet
List<ChunkMetadataEntity> hits = chunkMetadataRepository
.findByBookIdAndEntityContains(bookId, canonical); // WHERE entities @> to_jsonb(?::text)
if (hits.isEmpty()) {
// 5b. Fallback: vector search, then enrich-join + facet-group
List<Document> vectorHits = vectorStore.similaritySearch(/* TEXT filter, book_id filter, topK=30 */);
List<String> chunkIds = vectorHits.stream().map(Document::getId).toList();
hits = chunkMetadataRepository.findByChunkIdIn(chunkIds);
}
Map<ConceptFacet, List<ChunkMetadataEntity>> byFacet = hits.stream()
.collect(groupingBy(ChunkMetadataEntity::getFacet, LinkedHashMap::new, toList()));
// Hydrate: load SectionEntity for each chunk's section_id; load linked figures
// via ChunkFigureRefRepository.findByChunkIdIn(chunkIds) — reuses existing linkage.
return assemble(byFacet, ...);
}
```
`ConceptRetrievalResult` = `Map<ConceptFacet, FacetBundle>` where each `FacetBundle` holds the parent sections, linked figures, and the per-chunk `summary` strings.
Cross-book aggregation: caller loops over READY books and merges bundles by facet.
### 6. Concept Report service & controller
New `ConceptReportService` in `com.aiteacher.concept` — mirrors the shape of `TopicSummaryService`, but:
- Calls `ConceptRetriever.retrieveByConcept(topic.getName(), bookId)` per book.
- For each facet that has hits, sends **one** LLM synthesis call with the chunks/figures of that facet — producing a structured, facet-labelled report.
- Persists in a new `concept_report` table:
```sql
CREATE TABLE concept_report (
id UUID PRIMARY KEY,
topic_id VARCHAR(255) NOT NULL REFERENCES topic(id),
report_number INT NOT NULL,
facets_json JSONB NOT NULL, -- [{facetKey,title,markdown,refLabels[]}, ...]
sources_json JSONB NOT NULL, -- deduplicated SourceReference[]
generated_at TIMESTAMPTZ NOT NULL,
UNIQUE (topic_id, report_number)
);
```
Controller `ConceptReportController` exposes three endpoints under `/api/v1/topics/{id}/concept-reports` (POST generate, GET list, GET `/{reportId}`).
Reuses `TopicSummaryResponse.SourceReference` verbatim.
### 7. Frontend
- `frontend/src/stores/topicStore.ts`: add parallel state `conceptReportList`, `activeConceptReport`, `conceptReportLoading`, and actions mirroring the existing summary ones.
- `frontend/src/views/TopicsView.vue`: add a **Summary / Concept Report** tab toggle at the top of the topic panel. Concept Report reuses the history-chips + Generate button UI. Report body renders each `FacetSection` as `<h3>{title}</h3>` + markdown.
- Loading hint: update the "up to 30 seconds" copy to "up to 60 seconds".
### 8. README update
Add an **Indexing Pipeline** diagram showing: PDF → parse → chunk → embed → **enrich (new)** → chunk_metadata. Plus a **Concept Retrieval** sequence diagram: query → entity-match SQL → facet-grouped bundle → synthesis → report.
## Decisions & trade-offs
- **Storage as separate Postgres table, not vectorstore JSON**: vectorstore has no metadata-only update API, backfill would require delete+reinsert (re-embedding cost). A dedicated table joins cleanly on `chunk_id` and is GIN-indexed.
- **Entity-match primary, vector fallback**: deterministic for the main use case, robust against typos/synonyms. Vector search stays the default for normal chat retrieval — this feature is additive.
- **Enrichment runs *after* embedding, not before**: keeps the two failure modes independent. The backfill endpoint is the universal recovery lever.
- **Fixed 9-value facet enum** (incl. `OTHER`): constrains LLM outputs; `OTHER` prevents forced mis-bucketing.
- **Direct `JdbcTemplate` read against `vector_store` for backfill**: Spring AI exposes no listing API. Acceptable for a POC, isolated behind one method.
- **Synchronous (sequential) LLM calls**: simplest; parallelism is a later optimisation if needed.
- **`model_version` column**: cheap insurance. If the prompt or facet taxonomy changes, backfill can re-enrich only stale rows.
## Verification
1. Migration applies V7 and V8. Tables and indexes created.
2. New book ingestion: upload PDF → `chunk_metadata` populated with plausible entities/facets/summaries.
3. Backfill: POST `/api/v1/admin/books/{id}/enrich` → idempotent, completes, re-run is a no-op.
4. Concept retrieval primary path: POST `/api/v1/topics/aneurysm/concept-reports` → 200 with facets populated.
5. Fallback path: misspelled topic still returns results via vector fallback.
6. Frontend: Concept Report tab renders facet-labelled markdown + sources + inline figures; persists across reloads.
7. Deletion: removing a book cascades to `chunk_metadata` rows.
8. Regression: existing chat and summary flows still work.
9. Lint & tests pass.
+70 -2
View File
@@ -32,6 +32,13 @@
<span>{{ book.status === 'PENDING' ? 'Queued for processing...' : 'Embedding in progress...' }}</span>
</div>
<div v-if="enrichProgress && enrichProgress.status === 'RUNNING'" class="processing-indicator">
<div class="spinner spinner-dark"></div>
<span>Enriching chunks {{ enrichProgress.chunksEnriched }} / {{ enrichProgress.chunksTotal }}</span>
</div>
<div v-if="enrichFeedback" class="enrich-feedback">{{ enrichFeedback }}</div>
<div class="book-actions">
<router-link
v-if="book.status === 'READY'"
@@ -40,6 +47,15 @@
>
Read
</router-link>
<button
v-if="book.status === 'READY' && uploadEnabled"
class="btn btn-secondary"
:disabled="enrichRunning"
@click="handleEnrich"
title="Enrich chunks with concept metadata"
>
{{ enrichRunning ? 'Enriching...' : 'Enrich' }}
</button>
<button
v-if="deleteEnabled"
class="btn btn-danger"
@@ -54,8 +70,10 @@
</template>
<script setup lang="ts">
import { computed } from 'vue'
import type { Book } from '@/stores/bookStore'
import { computed, onUnmounted, ref } from 'vue'
import type { Book, EnrichmentProgress } from '@/stores/bookStore'
import { useBookStore } from '@/stores/bookStore'
import { env } from '@/env';
const props = defineProps<{
book: Book
@@ -67,6 +85,47 @@ defineEmits<{
(e: 'delete', id: string): void
}>()
const bookStore = useBookStore()
const enrichProgress = ref<EnrichmentProgress | null>(null)
const enrichFeedback = ref<string | null>(null)
let pollTimer: ReturnType<typeof setInterval> | null = null
const enrichRunning = computed(() => enrichProgress.value?.status === 'RUNNING')
const uploadEnabled = env('VITE_UPLOAD_ENABLED') !== 'false'
async function handleEnrich() {
enrichFeedback.value = null
const started = await bookStore.startEnrichment(props.book.id)
if (!started) {
enrichFeedback.value = bookStore.error ?? 'Enrichment failed to start.'
return
}
enrichProgress.value = started
startPolling()
}
function startPolling() {
stopPolling()
pollTimer = setInterval(async () => {
const status = await bookStore.fetchEnrichmentStatus(props.book.id)
if (!status) return
enrichProgress.value = status
if (status.status === 'COMPLETED') {
stopPolling()
enrichFeedback.value = `Enriched ${status.chunksEnriched} / ${status.chunksTotal} chunks.`
}
}, 2000)
}
function stopPolling() {
if (pollTimer != null) {
clearInterval(pollTimer)
pollTimer = null
}
}
onUnmounted(stopPolling)
const statusClass = computed(() => {
switch (props.book.status) {
case 'READY':
@@ -193,4 +252,13 @@ function formatDate(iso: string): string {
gap: 0.5rem;
margin-top: 0.25rem;
}
.enrich-feedback {
font-size: 0.8rem;
color: #22543d;
background: #f0fff4;
border: 1px solid #c6f6d5;
border-radius: 6px;
padding: 0.4rem 0.6rem;
}
</style>
+12 -95
View File
@@ -8,61 +8,12 @@
<!-- Sources for assistant messages -->
<div v-if="!isUser && message.sources && message.sources.length > 0" class="message-sources">
<div class="sources-label">Sources:</div>
<div class="source-list" ref="sourceListEl">
<!-- TEXT sources -->
<div
v-for="(source, idx) in textSources"
:key="'text-' + idx"
class="source-item"
:class="{ 'source-item--active': activeRef === source.refLabel }"
:data-ref-label="source.refLabel"
>
<div
class="source-chip source-chip--text"
:class="{ 'source-chip--clickable': source.bookId && source.page }"
@click="source.bookId && source.page ? emit('open-source', source.bookId, source.page) : undefined"
>
<span class="source-icon">📖</span>
<span v-if="source.refLabel" class="source-ref-label">{{ source.refLabel }}</span>
<span class="source-book-title">{{ source.bookTitle }}</span>
<span v-if="source.page" class="source-page">p.&nbsp;{{ source.page }}</span>
<span v-if="source.bookId && source.page" class="source-open-hint"></span>
</div>
<div v-if="source.chunkText" class="source-chunk">{{ source.chunkText }}</div>
</div>
<!-- FIGURE sources -->
<div
v-for="(source, idx) in figureSources"
:key="'fig-' + idx"
class="source-item source-item--figure"
:class="{ 'source-item--active': activeRef === source.refLabel }"
:data-ref-label="source.refLabel"
>
<div
class="source-chip source-chip--figure"
:class="{ 'source-chip--clickable': source.bookId && source.page }"
@click="source.bookId && source.page ? emit('open-source', source.bookId, source.page) : undefined"
>
<span class="source-icon">🖼</span>
<span v-if="source.refLabel" class="source-ref-label source-ref-label--figure">{{ source.refLabel }}</span>
<span class="source-figure-label">{{ source.label || 'Figure' }}</span>
<span v-if="source.page" class="source-page">p.&nbsp;{{ source.page }}</span>
<span v-if="source.figureType" class="source-figure-type">{{ formatFigureType(source.figureType) }}</span>
<span v-if="source.bookId && source.page" class="source-open-hint"></span>
</div>
<div v-if="source.caption" class="source-caption">{{ source.caption }}</div>
<div class="source-figure-image">
<img
:src="source.imageUrl"
:alt="source.caption || source.label || 'Figure'"
class="figure-img"
loading="lazy"
@error="onImageError"
/>
</div>
</div>
</div>
<SourceList
ref="sourceListEl"
:sources="message.sources"
:active-ref="activeRef"
@open-source="(bookId: string, page: number) => emit('open-source', bookId, page)"
/>
</div>
<div class="message-timestamp">{{ formatTime(message.createdAt) }}</div>
@@ -74,6 +25,7 @@
import { computed, ref } from 'vue'
import { marked } from 'marked'
import type { ChatMessage, ChatSource } from '@/stores/chatStore'
import SourceList from '@/components/SourceList.vue'
const props = defineProps<{
message: ChatMessage
@@ -85,14 +37,12 @@ const emit = defineEmits<{
const isUser = computed(() => props.message.role === 'USER')
const activeRef = ref<string | null>(null)
const sourceListEl = ref<HTMLElement | null>(null)
const sourceListEl = ref<InstanceType<typeof SourceList> | null>(null)
function escapeHtml(s: string): string {
return s.replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;').replace(/"/g, '&quot;')
}
/** Replaces [S1]/[F1]-style labels in the rendered HTML with clickable badges.
* For figure citations, also injects an inline illustration below the badge. */
const renderedWithBadges = computed(() => {
const html = marked.parse(props.message.content) as string
@@ -104,7 +54,7 @@ const renderedWithBadges = computed(() => {
}
return html.replace(/\[(S|F)\d+\]/g, (match) => {
const inner = match.slice(1, -1) // e.g. "F1"
const inner = match.slice(1, -1)
const badge = `<span class="citation-badge" data-ref="${inner}" title="Jump to source ${inner}">${match}</span>`
const fig = figureMap.get(inner)
@@ -125,53 +75,20 @@ function onContentClick(e: MouseEvent) {
const target = e.target as HTMLElement
if (!target.classList.contains('citation-badge')) return
const label = target.getAttribute('data-ref') // e.g. "S1" or "F1"
const label = target.getAttribute('data-ref')
if (!label) return
activeRef.value = activeRef.value === label ? null : label
// Scroll to the matching source chip
const sourceEl = sourceListEl.value?.querySelector(`[data-ref-label="${label}"]`) as HTMLElement | null
const sourceEl = sourceListEl.value?.$el?.querySelector(`[data-ref-label="${label}"]`) as HTMLElement | null
sourceEl?.scrollIntoView({ behavior: 'smooth', block: 'start' })
// Open the book at the referenced page
const allSources = props.message.sources ?? []
const source = allSources.find((s: ChatSource) => s.refLabel === label)
const source = (props.message.sources ?? []).find((s: ChatSource) => s.refLabel === label)
if (source?.bookId && source.page) {
emit('open-source', source.bookId, source.page)
}
}
const textSources = computed(() =>
(props.message.sources ?? []).filter((s: ChatSource) => s.type === 'TEXT' || !s.type)
)
const figureSources = computed(() =>
(props.message.sources ?? []).filter((s: ChatSource) => s.type === 'FIGURE')
)
function formatFigureType(type: string): string {
const labels: Record<string, string> = {
ANATOMICAL_DIAGRAM: 'Anatomical Diagram',
SURGICAL_PHOTOGRAPH: 'Surgical Photo',
MRI_CT_SCAN: 'MRI / CT',
TABLE: 'Table',
CHART: 'Chart',
INTRAOPERATIVE_IMAGE: 'Intraoperative'
}
return labels[type] ?? type
}
function onImageError(e: Event) {
const img = e.target as HTMLImageElement
img.alt = 'Image unavailable'
img.style.display = 'none'
const wrapper = img.parentElement
if (wrapper) {
wrapper.innerHTML = '<span class="figure-missing">Image unavailable</span>'
}
}
function formatTime(iso: string): string {
return new Date(iso).toLocaleTimeString([], { hour: '2-digit', minute: '2-digit' })
}
+298
View File
@@ -0,0 +1,298 @@
<template>
<div class="source-list">
<!-- TEXT sources -->
<div
v-for="(source, idx) in textSources"
:key="'text-' + idx"
class="source-item"
:class="{ 'source-item--active': activeRef === source.refLabel }"
:data-ref-label="source.refLabel"
>
<div class="source-chip-wrapper">
<div
class="source-chip source-chip--text"
:class="{ 'source-chip--clickable': source.bookId && source.page }"
@click="source.bookId && source.page ? emit('open-source', source.bookId, source.page) : undefined"
>
<span class="source-icon">📖</span>
<span v-if="source.refLabel" class="source-ref-label">{{ source.refLabel }}</span>
<span class="source-book-title">{{ source.bookTitle }}</span>
<span v-if="source.page" class="source-page">p.&nbsp;{{ source.page }}</span>
<span v-if="source.bookId && source.page" class="source-open-hint"></span>
</div>
<div v-if="source.chunkText" class="tooltip tooltip--text">
<p class="tooltip-chunk">{{ source.chunkText }}</p>
</div>
</div>
</div>
<!-- FIGURE sources -->
<div
v-for="(source, idx) in figureSources"
:key="'fig-' + idx"
class="source-item source-item--figure"
:class="{ 'source-item--active': activeRef === source.refLabel }"
:data-ref-label="source.refLabel"
>
<div class="source-chip-wrapper">
<div
class="source-chip source-chip--figure"
:class="{ 'source-chip--clickable': source.bookId && source.page }"
@click="source.bookId && source.page ? emit('open-source', source.bookId, source.page) : undefined"
>
<span class="source-icon">🖼</span>
<span v-if="source.refLabel" class="source-ref-label source-ref-label--figure">{{ source.refLabel }}</span>
<span class="source-figure-label">{{ source.label || 'Figure' }}</span>
<span v-if="source.page" class="source-page">p.&nbsp;{{ source.page }}</span>
<span v-if="source.figureType" class="source-figure-type">{{ formatFigureType(source.figureType) }}</span>
<span v-if="source.bookId && source.page" class="source-open-hint"></span>
</div>
<div v-if="source.imageUrl || source.caption" class="tooltip tooltip--figure">
<img
v-if="source.imageUrl"
:src="source.imageUrl"
:alt="source.caption || source.label || 'Figure'"
class="tooltip-figure-img"
loading="lazy"
@error="onImageError"
/>
<p v-if="source.caption" class="tooltip-caption">{{ source.caption }}</p>
</div>
</div>
</div>
</div>
</template>
<script setup lang="ts">
import { computed } from 'vue'
export interface SourceItem {
type?: 'TEXT' | 'FIGURE'
refLabel?: string
bookId?: string | null
bookTitle: string
page?: number | null
chunkText?: string
figureId?: string
label?: string
caption?: string
figureType?: string
imageUrl?: string
}
const props = defineProps<{
sources: SourceItem[]
activeRef?: string | null
}>()
const emit = defineEmits<{
'open-source': [bookId: string, page: number]
}>()
const textSources = computed(() =>
props.sources.filter(s => s.type === 'TEXT' || !s.type)
)
const figureSources = computed(() =>
props.sources.filter(s => s.type === 'FIGURE')
)
function formatFigureType(type: string): string {
const labels: Record<string, string> = {
ANATOMICAL_DIAGRAM: 'Anatomical Diagram',
SURGICAL_PHOTOGRAPH: 'Surgical Photo',
MRI_CT_SCAN: 'MRI / CT',
TABLE: 'Table',
CHART: 'Chart',
INTRAOPERATIVE_IMAGE: 'Intraoperative'
}
return labels[type] ?? type
}
function onImageError(e: Event) {
const img = e.target as HTMLImageElement
img.style.display = 'none'
const wrapper = img.parentElement
if (wrapper) {
const missing = document.createElement('span')
missing.className = 'figure-missing'
missing.textContent = 'Image unavailable'
wrapper.appendChild(missing)
}
}
</script>
<style scoped>
.source-list {
display: flex;
flex-direction: column;
gap: 0.5rem;
}
.source-item {
display: flex;
flex-direction: column;
gap: 0.25rem;
}
.source-item--active {
outline: 2px solid #4299e1;
border-radius: 6px;
}
/* Wrapper provides the positioning context for the tooltip */
.source-chip-wrapper {
position: relative;
display: inline-block;
}
/* ── Chip base ── */
.source-chip {
display: inline-flex;
align-items: center;
gap: 0.25rem;
border-radius: 4px;
padding: 0.2rem 0.5rem;
font-size: 0.78rem;
}
.source-chip--text {
background: #ebf8ff;
border: 1px solid #bee3f8;
}
.source-chip--figure {
background: #f0fff4;
border: 1px solid #9ae6b4;
}
.source-chip--clickable {
cursor: pointer;
transition: background 0.15s, border-color 0.15s;
}
.source-chip--clickable:hover {
background: #bee3f8;
border-color: #90cdf4;
}
.source-chip--figure.source-chip--clickable:hover {
background: #c6f6d5;
border-color: #68d391;
}
/* ── Tooltip ── */
.tooltip {
display: none;
position: absolute;
left: 0;
top: calc(100% + 6px);
z-index: 100;
background: #1a202c;
border-radius: 6px;
padding: 0.6rem 0.75rem;
box-shadow: 0 4px 16px rgba(0, 0, 0, 0.2);
/* Keep it from overflowing too far */
max-width: min(340px, 80vw);
pointer-events: none;
}
/* Show on chip hover */
.source-chip-wrapper:hover .tooltip {
display: block;
}
/* Small arrow pointing up */
.tooltip::before {
content: '';
position: absolute;
top: -5px;
left: 14px;
border-left: 5px solid transparent;
border-right: 5px solid transparent;
border-bottom: 5px solid #1a202c;
}
.tooltip--text .tooltip-chunk {
margin: 0;
font-size: 0.78rem;
color: #e2e8f0;
line-height: 1.5;
white-space: pre-wrap;
word-break: break-word;
}
.tooltip--figure {
max-width: min(300px, 80vw);
}
.tooltip-figure-img {
display: block;
max-width: 100%;
max-height: 220px;
border-radius: 4px;
object-fit: contain;
margin-bottom: 0.4rem;
}
.tooltip-caption {
margin: 0;
font-size: 0.75rem;
color: #cbd5e0;
font-style: italic;
line-height: 1.4;
}
/* ── Chip internals ── */
.source-icon {
font-size: 0.8rem;
}
.source-ref-label {
font-size: 0.72rem;
font-weight: 700;
background: #bee3f8;
color: #2b6cb0;
border-radius: 3px;
padding: 0 0.3rem;
}
.source-ref-label--figure {
background: #9ae6b4;
color: #276749;
}
.source-book-title {
color: #2b6cb0;
font-weight: 500;
}
.source-figure-label {
color: #276749;
font-weight: 600;
}
.source-figure-type {
color: #718096;
font-size: 0.72rem;
background: #e2e8f0;
border-radius: 3px;
padding: 0 0.3rem;
}
.source-page {
color: #718096;
}
.source-open-hint {
font-size: 0.75rem;
color: #3182ce;
margin-left: 0.1rem;
}
.figure-missing {
font-size: 0.78rem;
color: #a0aec0;
font-style: italic;
}
</style>
+38 -1
View File
@@ -77,5 +77,42 @@ export const useBookStore = defineStore('books', () => {
}
}
return { books, loading, uploading, error, fetchBooks, uploadBook, refreshBook, deleteBook }
async function startEnrichment(id: string): Promise<EnrichmentProgress | null> {
try {
const response = await api.post<EnrichmentProgress>(`/admin/books/${id}/enrich`)
return response.data
} catch (err: any) {
error.value = err.message
return null
}
}
async function fetchEnrichmentStatus(id: string): Promise<EnrichmentProgress | null> {
try {
const response = await api.get<EnrichmentProgress>(`/admin/books/${id}/enrich`)
return response.data
} catch {
return null
}
}
return {
books,
loading,
uploading,
error,
fetchBooks,
uploadBook,
refreshBook,
deleteBook,
startEnrichment,
fetchEnrichmentStatus
}
})
export interface EnrichmentProgress {
status: 'IDLE' | 'RUNNING' | 'COMPLETED'
chunksTotal: number
chunksEnriched: number
errorMessage: string | null
}
+94 -3
View File
@@ -15,9 +15,11 @@ export interface SourceReference {
bookId: string | null
bookTitle: string
page: number | null
chunkText?: string
figureId?: string
label?: string
caption?: string
figureType?: string
imageUrl?: string
}
@@ -37,6 +39,29 @@ export interface SavedSummaryItem {
generatedAt: string
}
export interface FacetSection {
facetKey: string
title: string
markdown: string
refLabels: string[]
}
export interface ConceptReport {
id: string
reportNumber: number
topicId: string
topicName: string
facets: FacetSection[]
sources: SourceReference[]
generatedAt: string
}
export interface SavedConceptReportItem {
id: string
reportNumber: number
generatedAt: string
}
export const useTopicStore = defineStore('topics', () => {
const topics = ref<Topic[]>([])
const activeSummary = ref<TopicSummary | null>(null)
@@ -47,6 +72,11 @@ export const useTopicStore = defineStore('topics', () => {
const summaryListLoading = ref(false)
const error = ref<string | null>(null)
const activeConceptReport = ref<ConceptReport | null>(null)
const conceptReportList = ref<SavedConceptReportItem[]>([])
const conceptReportLoading = ref(false)
const conceptReportListLoading = ref(false)
async function fetchTopics() {
loading.value = true
error.value = null
@@ -90,13 +120,17 @@ export const useTopicStore = defineStore('topics', () => {
}
}
async function generateSummary(topicId: string): Promise<TopicSummary | null> {
async function generateSummary(topicId: string, language: 'en' | 'th' = 'en'): Promise<TopicSummary | null> {
summaryLoading.value = true
activeSummaryTopicId.value = topicId
activeSummary.value = null
error.value = null
try {
const response = await api.post<TopicSummary>(`/topics/${topicId}/summary`)
const response = await api.post<TopicSummary>(
`/topics/${topicId}/summary`,
null,
{ params: { language } }
)
activeSummary.value = response.data
return response.data
} catch (err: any) {
@@ -108,6 +142,56 @@ export const useTopicStore = defineStore('topics', () => {
}
}
async function fetchConceptReports(topicId: string) {
conceptReportListLoading.value = true
conceptReportList.value = []
error.value = null
try {
const response = await api.get<SavedConceptReportItem[]>(`/topics/${topicId}/concept-reports`)
conceptReportList.value = response.data
} catch (err: any) {
error.value = err.message
} finally {
conceptReportListLoading.value = false
}
}
async function fetchConceptReportDetail(topicId: string, reportId: string): Promise<ConceptReport | null> {
conceptReportLoading.value = true
activeConceptReport.value = null
error.value = null
try {
const response = await api.get<ConceptReport>(`/topics/${topicId}/concept-reports/${reportId}`)
activeConceptReport.value = response.data
return response.data
} catch (err: any) {
error.value = err.message
return null
} finally {
conceptReportLoading.value = false
}
}
async function generateConceptReport(topicId: string, language: 'en' | 'th' = 'en'): Promise<ConceptReport | null> {
conceptReportLoading.value = true
activeConceptReport.value = null
error.value = null
try {
const response = await api.post<ConceptReport>(
`/topics/${topicId}/concept-reports`,
null,
{ params: { language } }
)
activeConceptReport.value = response.data
return response.data
} catch (err: any) {
error.value = err.message
return null
} finally {
conceptReportLoading.value = false
}
}
return {
topics,
activeSummary,
@@ -117,9 +201,16 @@ export const useTopicStore = defineStore('topics', () => {
summaryLoading,
summaryListLoading,
error,
activeConceptReport,
conceptReportList,
conceptReportLoading,
conceptReportListLoading,
fetchTopics,
fetchSummaries,
fetchSummaryDetail,
generateSummary
generateSummary,
fetchConceptReports,
fetchConceptReportDetail,
generateConceptReport
}
})
+351 -101
View File
@@ -20,14 +20,46 @@
<div v-else class="topics-layout">
<div class="topics-main">
<!-- Mode toggle: Summary vs Concept Report -->
<div v-if="selectedTopicId" class="mode-toggle">
<button
class="mode-tab"
:class="{ 'mode-tab--active': mode === 'summary' }"
@click="setMode('summary')"
>Summary</button>
<button
class="mode-tab"
:class="{ 'mode-tab--active': mode === 'concept' }"
@click="setMode('concept')"
>Concept Report</button>
</div>
<!-- Summary history list -->
<div v-if="selectedTopicId" class="history-panel card">
<div v-if="selectedTopicId && mode === 'summary'" class="history-panel card">
<div class="history-header">
<span class="history-title">Saved summaries</span>
<button class="btn btn-primary btn-sm" :disabled="topicStore.summaryLoading" @click="handleGenerate(selectedTopicId!)">
<span v-if="topicStore.summaryLoading" class="spinner" style="width:14px;height:14px;display:inline-block;vertical-align:middle;margin-right:4px;"></span>
Generate New
</button>
<div class="history-actions">
<div class="lang-toggle" role="group" aria-label="Summary language">
<button
type="button"
class="lang-toggle-btn"
:class="{ 'lang-toggle-btn--active': summaryLanguage === 'en' }"
:disabled="topicStore.summaryLoading"
@click="summaryLanguage = 'en'"
>EN</button>
<button
type="button"
class="lang-toggle-btn"
:class="{ 'lang-toggle-btn--active': summaryLanguage === 'th' }"
:disabled="topicStore.summaryLoading"
@click="summaryLanguage = 'th'"
>TH</button>
</div>
<button class="btn btn-primary btn-sm" :disabled="topicStore.summaryLoading" @click="handleGenerate(selectedTopicId!)">
<span v-if="topicStore.summaryLoading" class="spinner" style="width:14px;height:14px;display:inline-block;vertical-align:middle;margin-right:4px;"></span>
Generate New
</button>
</div>
</div>
<div v-if="topicStore.summaryListLoading" class="history-loading">
@@ -53,8 +85,59 @@
</div>
</div>
<!-- Concept report history list -->
<div v-if="selectedTopicId && mode === 'concept'" class="history-panel card">
<div class="history-header">
<span class="history-title">Saved concept reports</span>
<div class="history-actions">
<div class="lang-toggle" role="group" aria-label="Report language">
<button
type="button"
class="lang-toggle-btn"
:class="{ 'lang-toggle-btn--active': conceptLanguage === 'en' }"
:disabled="topicStore.conceptReportLoading"
@click="conceptLanguage = 'en'"
>EN</button>
<button
type="button"
class="lang-toggle-btn"
:class="{ 'lang-toggle-btn--active': conceptLanguage === 'th' }"
:disabled="topicStore.conceptReportLoading"
@click="conceptLanguage = 'th'"
>TH</button>
</div>
<button class="btn btn-primary btn-sm" :disabled="topicStore.conceptReportLoading" @click="handleGenerateConcept(selectedTopicId!)">
<span v-if="topicStore.conceptReportLoading" class="spinner" style="width:14px;height:14px;display:inline-block;vertical-align:middle;margin-right:4px;"></span>
Generate New
</button>
</div>
</div>
<div v-if="topicStore.conceptReportListLoading" class="history-loading">
<div class="spinner spinner-dark" style="width:20px;height:20px;margin-right:8px;display:inline-block;vertical-align:middle;"></div>
Loading...
</div>
<div v-else-if="topicStore.conceptReportList.length === 0" class="history-empty">
No concept reports yet. Click "Generate New" to create one.
</div>
<div v-else class="history-list">
<button
v-for="item in topicStore.conceptReportList"
:key="item.id"
class="history-chip"
:class="{ 'history-chip--active': topicStore.activeConceptReport?.id === item.id }"
@click="handleLoadConceptReport(item)"
>
Report #{{ item.reportNumber }}
<span class="history-chip-date">· {{ formatDateShort(item.generatedAt) }}</span>
</button>
</div>
</div>
<!-- Summary Panel -->
<div v-if="topicStore.summaryLoading" class="summary-panel card">
<div v-if="mode === 'summary' && topicStore.summaryLoading" class="summary-panel card">
<div class="summary-loading">
<div class="spinner spinner-dark" style="width:36px;height:36px;margin:0 auto 1rem;"></div>
<p class="summary-loading-text">Generating summary from uploaded books...</p>
@@ -62,7 +145,7 @@
</div>
</div>
<div v-else-if="summaryError" class="summary-panel card summary-error">
<div v-else-if="mode === 'summary' && summaryError" class="summary-panel card summary-error">
<h2 class="summary-topic-name">Summary Error</h2>
<p class="error-text">{{ summaryError }}</p>
<p v-if="isNoBooks" class="no-books-hint">
@@ -72,13 +155,13 @@
</p>
</div>
<div v-else-if="!topicStore.activeSummary" class="summary-panel card summary-placeholder">
<div v-else-if="mode === 'summary' && !topicStore.activeSummary" class="summary-panel card summary-placeholder">
<p class="summary-placeholder-text">
{{ selectedTopicId ? 'Select a saved summary or generate a new one.' : 'Select a topic to get started.' }}
</p>
</div>
<div v-else class="summary-panel card">
<div v-else-if="mode === 'summary'" class="summary-panel card">
<div class="summary-header">
<h2 class="summary-topic-name">{{ topicStore.activeSummary.topicName }}</h2>
<div class="summary-meta">
@@ -96,57 +179,11 @@
Sources ({{ topicStore.activeSummary.sources.length }})
<span>{{ showSources ? '▲' : '▼' }}</span>
</button>
<div v-if="showSources" class="sources-list">
<!-- TEXT sources -->
<div
v-for="(source, idx) in textSources"
:key="'text-' + idx"
class="source-item"
:data-ref-label="source.refLabel"
>
<div
class="source-chip source-chip--text"
:class="{ 'source-chip--clickable': source.bookId && source.page }"
@click="source.bookId && source.page ? handleOpenSource(source.bookId, source.page) : undefined"
>
<span class="source-icon">📖</span>
<span v-if="source.refLabel" class="source-ref-label">{{ source.refLabel }}</span>
<span class="source-book">{{ source.bookTitle }}</span>
<span v-if="source.page" class="source-page">p.&nbsp;{{ source.page }}</span>
<span v-if="source.bookId && source.page" class="source-open-hint"></span>
</div>
</div>
<!-- FIGURE sources -->
<div
v-for="(source, idx) in figureSources"
:key="'fig-' + idx"
class="source-item source-item--figure"
:data-ref-label="source.refLabel"
>
<div
class="source-chip source-chip--figure"
:class="{ 'source-chip--clickable': source.bookId && source.page }"
@click="source.bookId && source.page ? handleOpenSource(source.bookId, source.page) : undefined"
>
<span class="source-icon">🖼</span>
<span v-if="source.refLabel" class="source-ref-label source-ref-label--figure">{{ source.refLabel }}</span>
<span class="source-figure-label">{{ source.label || 'Figure' }}</span>
<span v-if="source.page" class="source-page">p.&nbsp;{{ source.page }}</span>
<span v-if="source.bookId && source.page" class="source-open-hint"></span>
</div>
<div v-if="source.caption" class="source-caption">{{ source.caption }}</div>
<div v-if="source.imageUrl" class="source-figure-image">
<img
:src="source.imageUrl"
:alt="source.caption || source.label || 'Figure'"
class="figure-img"
loading="lazy"
@error="onImageError"
/>
</div>
</div>
</div>
<SourceList
v-if="showSources"
:sources="topicStore.activeSummary.sources"
@open-source="(bookId: string, page: number) => handleOpenSource(bookId, page)"
/>
<BookPagePanel
v-if="readerPanel"
@@ -163,6 +200,77 @@
</div>
</div>
<!-- Concept Report panel -->
<div v-if="mode === 'concept' && topicStore.conceptReportLoading" class="summary-panel card">
<div class="summary-loading">
<div class="spinner spinner-dark" style="width:36px;height:36px;margin:0 auto 1rem;"></div>
<p class="summary-loading-text">Generating facet-organized concept report...</p>
<p class="summary-loading-hint">This may take up to 60 seconds.</p>
</div>
</div>
<div v-else-if="mode === 'concept' && conceptError" class="summary-panel card summary-error">
<h2 class="summary-topic-name">Concept Report Error</h2>
<p class="error-text">{{ conceptError }}</p>
<p v-if="isNoBooks" class="no-books-hint">
Please
<RouterLink to="/">upload and process at least one book</RouterLink>
first.
</p>
</div>
<div v-else-if="mode === 'concept' && !topicStore.activeConceptReport" class="summary-panel card summary-placeholder">
<p class="summary-placeholder-text">
{{ selectedTopicId ? 'Select a saved concept report or generate a new one.' : 'Select a topic to get started.' }}
</p>
</div>
<div v-else-if="mode === 'concept'" class="summary-panel card">
<div class="summary-header">
<h2 class="summary-topic-name">{{ topicStore.activeConceptReport!.topicName }}</h2>
<div class="summary-meta">
<span class="summary-number">
Concept Report #{{ topicStore.activeConceptReport!.reportNumber }}
</span>
<span class="summary-timestamp">{{ formatDate(topicStore.activeConceptReport!.generatedAt) }}</span>
</div>
</div>
<div
v-for="facet in topicStore.activeConceptReport!.facets"
:key="facet.facetKey"
class="concept-facet"
>
<h3 class="concept-facet-title">{{ facet.title }}</h3>
<div class="summary-text summary-text--markdown" v-html="renderFacetMarkdown(facet.markdown)" @click="handleSummaryClick"></div>
</div>
<div ref="sourcesSection" v-if="topicStore.activeConceptReport!.sources.length > 0" class="sources-section">
<button class="sources-toggle" @click="showSources = !showSources">
Sources ({{ topicStore.activeConceptReport!.sources.length }})
<span>{{ showSources ? '▲' : '▼' }}</span>
</button>
<SourceList
v-if="showSources"
:sources="topicStore.activeConceptReport!.sources"
@open-source="(bookId: string, page: number) => handleOpenSource(bookId, page)"
/>
<BookPagePanel
v-if="readerPanel"
:book-id="readerPanel.bookId"
:page="readerPanel.page"
:book-title="readerPanel.bookTitle"
class="reader-panel"
@close="readerPanel = null"
@navigate="(p) => readerPanel && (readerPanel.page = p)"
/>
</div>
<div v-else class="no-sources">
No source citations available for this concept report.
</div>
</div>
<!-- Topic Grid -->
<div class="topic-grid">
<TopicCard
@@ -183,10 +291,11 @@
import { ref, computed, onMounted, inject } from 'vue'
import { marked } from 'marked'
import { RouterLink } from 'vue-router'
import { useTopicStore, type SavedSummaryItem, type SourceReference } from '@/stores/topicStore'
import { useTopicStore, type SavedSummaryItem, type SavedConceptReportItem, type SourceReference } from '@/stores/topicStore'
import { useBookStore } from '@/stores/bookStore'
import TopicCard from '@/components/TopicCard.vue'
import BookPagePanel from '@/components/BookPagePanel.vue'
import SourceList from '@/components/SourceList.vue'
const topicStore = useTopicStore()
const bookStore = useBookStore()
@@ -194,37 +303,52 @@ const showToast = inject<(msg: string, type?: 'error' | 'success') => void>('sho
const showSources = ref(true)
const summaryError = ref<string | null>(null)
const conceptError = ref<string | null>(null)
const isNoBooks = ref(false)
const conceptLanguage = ref<'en' | 'th'>('en')
const summaryLanguage = ref<'en' | 'th'>('en')
const sourcesSection = ref<HTMLElement | null>(null)
const selectedTopicId = ref<string | null>(null)
const mode = ref<'summary' | 'concept'>('summary')
interface ReaderPanel { bookId: string; page: number; bookTitle?: string }
const readerPanel = ref<ReaderPanel | null>(null)
const summaryTopics = computed(() => topicStore.topics.filter(t => t.id !== 'free-form'))
const textSources = computed(() =>
(topicStore.activeSummary?.sources ?? []).filter(s => s.type === 'TEXT' || !s.type)
)
const figureSources = computed(() =>
(topicStore.activeSummary?.sources ?? []).filter(s => s.type === 'FIGURE')
)
function onImageError(e: Event) {
const img = e.target as HTMLImageElement
img.style.display = 'none'
const wrapper = img.parentElement
if (wrapper) {
wrapper.innerHTML = '<span class="figure-missing">Image unavailable</span>'
}
}
function escapeHtml(s: string): string {
return s.replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;').replace(/"/g, '&quot;')
}
function renderOneCitation(label: string, figureMap: Map<string, SourceReference>): string {
const badge = `<span class="source-ref" data-ref="${label}" title="Jump to source ${label}">[${label}]</span>`
const fig = figureMap.get(label)
if (fig?.imageUrl) {
const alt = escapeHtml(fig.caption || fig.label || 'Figure')
const captionText = [fig.label, fig.caption].filter(Boolean).map(escapeHtml).join(' — ')
const captionHtml = captionText
? `<figcaption class="inline-figure-caption">${captionText}</figcaption>`
: ''
return `${badge}<figure class="inline-figure"><img src="${fig.imageUrl}" alt="${alt}" class="inline-figure-img" loading="lazy" onerror="this.parentElement.style.display='none'" />${captionHtml}</figure>`
}
return badge
}
// Matches [S1], [F2], and tolerates multi-label malformed output like [S26 1], [S1, S2], [S1 F3].
// Inside each bracket we extract every ([SF]?)(\d+) token; bare numbers inherit the last seen prefix.
function replaceCitations(html: string, figureMap: Map<string, SourceReference>): string {
return html.replace(/\[([SF]\d+(?:[\s,]+[SF]?\d+)*)\]/g, (_match, inner: string) => {
const tokens: string[] = []
let lastType: 'S' | 'F' = 'S'
for (const m of inner.matchAll(/([SF]?)(\d+)/g)) {
const prefix = (m[1] || lastType) as 'S' | 'F'
lastType = prefix
tokens.push(`${prefix}${m[2]}`)
}
return tokens.map(label => renderOneCitation(label, figureMap)).join(' ')
})
}
const renderedSummary = computed(() => {
if (!topicStore.activeSummary) return ''
const html = marked.parse(topicStore.activeSummary.summary) as string
@@ -236,22 +360,7 @@ const renderedSummary = computed(() => {
}
}
return html.replace(/\[(S|F)\d+\]/g, (match) => {
const inner = match.slice(1, -1)
const badge = `<span class="source-ref" data-ref="${inner}" title="Jump to source ${inner}">${match}</span>`
const fig = figureMap.get(inner)
if (fig?.imageUrl) {
const alt = escapeHtml(fig.caption || fig.label || 'Figure')
const captionText = [fig.label, fig.caption].filter(Boolean).map(escapeHtml).join(' — ')
const captionHtml = captionText
? `<figcaption class="inline-figure-caption">${captionText}</figcaption>`
: ''
return `${badge}<figure class="inline-figure"><img src="${fig.imageUrl}" alt="${alt}" class="inline-figure-img" loading="lazy" onerror="this.parentElement.style.display='none'" />${captionHtml}</figure>`
}
return badge
})
return replaceCitations(html, figureMap)
})
function handleSummaryClick(e: MouseEvent) {
@@ -271,12 +380,21 @@ async function handleTopicClick(topicId: string) {
if (selectedTopicId.value !== topicId) {
selectedTopicId.value = topicId
topicStore.activeSummary = null
topicStore.activeConceptReport = null
summaryError.value = null
await topicStore.fetchSummaries(topicId)
// Auto-load the latest summary if any exist
const list = topicStore.summaryList
if (list.length > 0) {
await topicStore.fetchSummaryDetail(topicId, list[list.length - 1].id)
conceptError.value = null
if (mode.value === 'summary') {
await topicStore.fetchSummaries(topicId)
const list = topicStore.summaryList
if (list.length > 0) {
await topicStore.fetchSummaryDetail(topicId, list[list.length - 1].id)
}
} else {
await topicStore.fetchConceptReports(topicId)
const list = topicStore.conceptReportList
if (list.length > 0) {
await topicStore.fetchConceptReportDetail(topicId, list[list.length - 1].id)
}
}
}
}
@@ -287,6 +405,52 @@ async function handleLoadSummary(item: SavedSummaryItem) {
await topicStore.fetchSummaryDetail(selectedTopicId.value, item.id)
}
async function setMode(next: 'summary' | 'concept') {
if (mode.value === next) return
mode.value = next
readerPanel.value = null
if (next === 'concept' && selectedTopicId.value) {
await topicStore.fetchConceptReports(selectedTopicId.value)
const list = topicStore.conceptReportList
if (list.length > 0) {
await topicStore.fetchConceptReportDetail(selectedTopicId.value, list[list.length - 1].id)
}
}
}
function renderFacetMarkdown(md: string): string {
if (!md) return ''
const html = marked.parse(md) as string
const figureMap = new Map<string, SourceReference>()
const sources = topicStore.activeConceptReport?.sources ?? []
for (const src of sources) {
if (src.type === 'FIGURE' && src.refLabel) figureMap.set(src.refLabel, src)
}
return replaceCitations(html, figureMap)
}
async function handleLoadConceptReport(item: SavedConceptReportItem) {
if (!selectedTopicId.value) return
conceptError.value = null
await topicStore.fetchConceptReportDetail(selectedTopicId.value, item.id)
}
async function handleGenerateConcept(topicId: string) {
conceptError.value = null
isNoBooks.value = false
showSources.value = true
const result = await topicStore.generateConceptReport(topicId, conceptLanguage.value)
if (!result) {
conceptError.value = topicStore.error ?? 'Failed to generate concept report.'
isNoBooks.value =
conceptError.value.toLowerCase().includes('no books') ||
conceptError.value.toLowerCase().includes('knowledge source')
showToast?.(conceptError.value, 'error')
} else {
await topicStore.fetchConceptReports(topicId)
}
}
onMounted(async () => {
await topicStore.fetchTopics()
if (bookStore.books.length === 0) {
@@ -299,7 +463,7 @@ async function handleGenerate(topicId: string) {
isNoBooks.value = false
showSources.value = true
const result = await topicStore.generateSummary(topicId)
const result = await topicStore.generateSummary(topicId, summaryLanguage.value)
if (!result) {
summaryError.value = topicStore.error ?? 'Failed to generate summary.'
isNoBooks.value =
@@ -349,6 +513,46 @@ function formatDateShort(iso: string): string {
gap: 1rem;
}
.mode-toggle {
display: flex;
gap: 0.5rem;
margin-bottom: 0.25rem;
}
.mode-tab {
background: transparent;
border: 1px solid #cbd5e0;
color: #4a5568;
padding: 0.4rem 1rem;
font-size: 0.9rem;
font-weight: 500;
cursor: pointer;
border-radius: 999px;
}
.mode-tab:hover {
background: #edf2f7;
}
.mode-tab--active {
background: #553c9a;
color: white;
border-color: #553c9a;
}
.concept-facet {
margin-bottom: 1.5rem;
}
.concept-facet-title {
font-size: 1.1rem;
font-weight: 600;
color: #553c9a;
margin: 0 0 0.5rem 0;
padding-bottom: 0.25rem;
border-bottom: 1px solid #e2e8f0;
}
/* History panel */
.history-panel {
border-top: 3px solid #805ad5;
@@ -362,6 +566,52 @@ function formatDateShort(iso: string): string {
margin-bottom: 0.75rem;
}
.history-actions {
display: flex;
align-items: center;
gap: 0.5rem;
}
.lang-toggle {
display: inline-flex;
border: 1px solid var(--border-color, #d0d7de);
border-radius: 6px;
overflow: hidden;
}
.lang-toggle-btn {
padding: 0.25rem 0.55rem;
font-size: 0.75rem;
font-weight: 600;
background: transparent;
color: var(--text-secondary, #57606a);
border: none;
cursor: pointer;
line-height: 1;
}
.lang-toggle-btn:not(:last-child) {
border-right: 1px solid var(--border-color, #d0d7de);
}
.lang-toggle-btn:hover:not(:disabled) {
background: var(--hover-bg, #f3f4f6);
}
.lang-toggle-btn--active {
background: var(--primary-color, #0969da);
color: #fff;
}
.lang-toggle-btn--active:hover:not(:disabled) {
background: var(--primary-color, #0969da);
}
.lang-toggle-btn:disabled {
opacity: 0.5;
cursor: not-allowed;
}
.history-title {
font-size: 0.875rem;
font-weight: 600;