implementing pdf upload, text blob pdf-parse

This commit is contained in:
starmorph
2025-05-15 15:41:18 -07:00
parent d8c8a85f90
commit 5d86187d59
5 changed files with 2995 additions and 1161 deletions

View File

@@ -1,4 +1,10 @@
/** @type {import('next').NextConfig} */ /** @type {import('next').NextConfig} */
const nextConfig = {}; const nextConfig = {
experimental: {
serverActions: {
bodySizeLimit: '10mb',
},
},
};
export default nextConfig; export default nextConfig;

View File

@@ -19,6 +19,7 @@
"format:check": "prettier --check ." "format:check": "prettier --check ."
}, },
"dependencies": { "dependencies": {
"@langchain/community": "^0.3.42",
"@langchain/core": "^0.3.44", "@langchain/core": "^0.3.44",
"@langchain/langgraph": "^0.2.63", "@langchain/langgraph": "^0.2.63",
"@langchain/langgraph-sdk": "^0.0.66", "@langchain/langgraph-sdk": "^0.0.66",
@@ -41,6 +42,7 @@
"lucide-react": "^0.476.0", "lucide-react": "^0.476.0",
"next-themes": "^0.4.4", "next-themes": "^0.4.4",
"nuqs": "^2.4.1", "nuqs": "^2.4.1",
"pdfjs-dist": "^5.2.133",
"react": "^19.0.0", "react": "^19.0.0",
"react-dom": "^19.0.0", "react-dom": "^19.0.0",
"react-markdown": "^10.0.1", "react-markdown": "^10.0.1",
@@ -61,6 +63,7 @@
"@tailwindcss/postcss": "^4.0.13", "@tailwindcss/postcss": "^4.0.13",
"@types/lodash": "^4.17.16", "@types/lodash": "^4.17.16",
"@types/node": "^22.13.5", "@types/node": "^22.13.5",
"@types/pdf-parse": "^1.1.5",
"@types/react": "^19.0.8", "@types/react": "^19.0.8",
"@types/react-dom": "^19.0.3", "@types/react-dom": "^19.0.3",
"@types/react-syntax-highlighter": "^15.5.13", "@types/react-syntax-highlighter": "^15.5.13",
@@ -72,6 +75,7 @@
"eslint-plugin-react-refresh": "^0.4.18", "eslint-plugin-react-refresh": "^0.4.18",
"globals": "^15.14.0", "globals": "^15.14.0",
"next": "^15.2.3", "next": "^15.2.3",
"pdf-parse": "^1.1.1",
"postcss": "^8.5.3", "postcss": "^8.5.3",
"prettier": "^3.5.3", "prettier": "^3.5.3",
"prettier-plugin-tailwindcss": "^0.6.11", "prettier-plugin-tailwindcss": "^0.6.11",

4042
pnpm-lock.yaml generated

File diff suppressed because it is too large Load Diff

View File

@@ -37,13 +37,22 @@ import {
TooltipProvider, TooltipProvider,
TooltipTrigger, TooltipTrigger,
} from "../ui/tooltip"; } from "../ui/tooltip";
import { MessageContentImageUrl } from "@langchain/core/messages"; import { MessageContentImageUrl, MessageContentText } from "@langchain/core/messages";
import { extractPdfText } from "@/lib/pdf";
interface MessageContentImageUrlWrapper { interface MessageContentImageUrlWrapper {
id: string; id: string;
image: MessageContentImageUrl; image: MessageContentImageUrl;
} }
interface MessageContentPdfWrapper {
id: string;
pdf: MessageContentText;
name: string;
}
function StickyToBottomContent(props: { function StickyToBottomContent(props: {
content: ReactNode; content: ReactNode;
footer?: ReactNode; footer?: ReactNode;
@@ -123,6 +132,9 @@ export function Thread() {
const [imageUrlList, setImageUrlList] = useState<MessageContentImageUrlWrapper[]>( const [imageUrlList, setImageUrlList] = useState<MessageContentImageUrlWrapper[]>(
[], [],
); );
const [pdfUrlList, setPdfUrlList] = useState<MessageContentPdfWrapper[]>(
[],
);
const [firstTokenReceived, setFirstTokenReceived] = useState(false); const [firstTokenReceived, setFirstTokenReceived] = useState(false);
const isLargeScreen = useMediaQuery("(min-width: 1024px)"); const isLargeScreen = useMediaQuery("(min-width: 1024px)");
@@ -190,7 +202,7 @@ export function Thread() {
text: input, text: input,
}, },
...imageUrlList.map((item) => item.image), ...imageUrlList.map((item) => item.image),
...pdfUrlList.map((item) => item.pdf),
], ],
}; };
@@ -242,6 +254,28 @@ export function Thread() {
e.target.value = ""; e.target.value = "";
}; };
const handlePDFUpload = async (e: ChangeEvent<HTMLInputElement>) => {
const files = e.target.files;
if (files) {
const pdfTexts: MessageContentPdfWrapper[] = await Promise.all(
Array.from(files).map(async (file) => {
const pdf = await extractPdfText(file);
return {
id: uuidv4(),
pdf,
name: file.name,
};
}),
);
setPdfUrlList([...pdfUrlList, ...pdfTexts]);
}
e.target.value = "";
};
const handleRegenerate = ( const handleRegenerate = (
parentCheckpoint: Checkpoint | null | undefined, parentCheckpoint: Checkpoint | null | undefined,
) => { ) => {
@@ -282,6 +316,11 @@ export function Thread() {
toast.error("You have uploaded invalid file type. Please upload an image or a PDF."); toast.error("You have uploaded invalid file type. Please upload an image or a PDF.");
} }
/**
* If there are any image files in the dropped files, this block reads each image file as a data URL,
* wraps it in a MessageContentImageUrl object, and updates the imageUrlList state with the new images.
* This enables preview and later sending of uploaded images in the chat UI.
*/
if (imageFiles.length) { if (imageFiles.length) {
const imageUrls = await Promise.all( const imageUrls = await Promise.all(
Array.from(imageFiles).map((file) => { Array.from(imageFiles).map((file) => {
@@ -305,6 +344,20 @@ export function Thread() {
})); }));
setImageUrlList([...imageUrlList, ...wrappedImages]); setImageUrlList([...imageUrlList, ...wrappedImages]);
} }
/**
* If there are any PDF files in the dropped files, this block previews the file name of each uploaded PDF
* by rendering a list of file names above the input area, with a remove button for each.
*/
if (files.some(file => file.type === "application/pdf")) {
const pdfFiles = files.filter(file => file.type === "application/pdf");
const pdfPreviews = pdfFiles.map((file) => ({
id: uuidv4(),
pdf: { type: 'text' as const, text: '' },
name: file.name,
}));
setPdfUrlList([...pdfUrlList, ...pdfPreviews]);
}
}; };
const handleDragEnter = (e: DragEvent) => { const handleDragEnter = (e: DragEvent) => {
@@ -555,6 +608,19 @@ export function Thread() {
})} })}
</div> </div>
)} )}
{pdfUrlList.length > 0 && (
<div className="flex flex-wrap gap-2 p-3.5 pb-0 ">
{pdfUrlList.map((pdf) => (
<div className="relative flex items-center gap-2 bg-gray-100 rounded px-2 py-1 border-1 border-teal-700 bg-teal-900 text-white rounded-md px-2 py-2" key={pdf.id}>
<span className=" truncate max-w-xs text-sm">{pdf.name}</span>
<CircleX
className="size-4 cursor-pointer text-teal-600 hover:text-teal-500"
onClick={() => setPdfUrlList(pdfUrlList.filter((p) => p.id !== pdf.id))}
/>
</div>
))}
</div>
)}
<textarea <textarea
value={input} value={input}
onChange={(e) => setInput(e.target.value)} onChange={(e) => setInput(e.target.value)}
@@ -583,15 +649,15 @@ export function Thread() {
> >
<Plus className="size-5 text-gray-600" /> <Plus className="size-5 text-gray-600" />
<span className="text-sm text-gray-600"> <span className="text-sm text-gray-600">
Upload Images Upload PDF
</span> </span>
</Label> </Label>
<input <input
id="file-input" id="file-input"
type="file" type="file"
onChange={handleImageUpload} onChange={handlePDFUpload}
multiple multiple
accept="image/*" accept="application/pdf"
className="hidden" className="hidden"
/> />
<div className="flex items-center space-x-2"> <div className="flex items-center space-x-2">

24
src/lib/pdf.ts Normal file
View File

@@ -0,0 +1,24 @@
"use server"
import { MessageContentText } from "@langchain/core/messages";
import { WebPDFLoader } from "@langchain/community/document_loaders/web/pdf";
// import { Base64ContentBlock } from "@langchain/core/messages";
// switch local import with above import
interface Base64ContentBlock {
data: string;
metadata?: Record<string, unknown>;
mime_type?: string;
source_type: "base64";
type: "image" | "audio" | "file";
}
export const extractPdfText = async (file: File): Promise<MessageContentText> => {
const loader = new WebPDFLoader(file, { splitPages: false });
const docs = await loader.load();
return {
type: "text",
text: docs[0].pageContent,
};
};