package com.arms.api.sample; import java.io.IOException; import java.util.List; import lombok.extern.slf4j.Slf4j; import org.springframework.ai.document.Document; import org.springframework.ai.reader.ExtractedTextFormatter; import org.springframework.ai.reader.markdown.MarkdownDocumentReader; import org.springframework.ai.reader.markdown.config.MarkdownDocumentReaderConfig; import org.springframework.ai.reader.pdf.PagePdfDocumentReader; import org.springframework.ai.reader.pdf.ParagraphPdfDocumentReader; import org.springframework.ai.reader.pdf.config.PdfDocumentReaderConfig; import org.springframework.ai.reader.tika.TikaDocumentReader; import org.springframework.ai.transformer.splitter.TokenTextSplitter; import org.springframework.ai.vectorstore.VectorStore; import org.springframework.core.io.ByteArrayResource; import org.springframework.core.io.Resource; import org.springframework.core.io.ResourceLoader; import org.springframework.stereotype.Component; import lombok.AllArgsConstructor; @Component @AllArgsConstructor @Slf4j public class MyPagePdfDocumentReader { private final ResourceLoader resourceLoader; private final VectorStore vectorStore; List getDocsFromPdf() { log.info("start getDocsFromPdf"); Resource resource = resourceLoader.getResource("classpath:pmbokpdf/PMBOK_4th_Edition.pdf"); try { byte[] data = resource.getInputStream().readAllBytes(); Resource newResource = new ByteArrayResource(data) { @Override public String getFilename() { return "PMBOK_4th_Edition_한글판_UNLOCK.pdf"; } }; TikaDocumentReader pdfReader = new TikaDocumentReader(resource); TokenTextSplitter tokenTextSplitter = new TokenTextSplitter(); vectorStore.write(tokenTextSplitter.split(pdfReader.read())); log.info("end getDocsFromPdf"); return pdfReader.read(); } catch (IOException e) { throw new RuntimeException(e); } } }