Skip to content

Commit 06e0de6

Browse files
authored
Multimodality: Upload PDFs and Images (#135)
2 parents f866300 + cb2d216 commit 06e0de6

File tree

10 files changed

+1880
-1147
lines changed

10 files changed

+1880
-1147
lines changed

next.config.mjs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,10 @@
11
/** @type {import('next').NextConfig} */
2-
const nextConfig = {};
2+
const nextConfig = {
3+
experimental: {
4+
serverActions: {
5+
bodySizeLimit: "10mb",
6+
},
7+
},
8+
};
39

410
export default nextConfig;

package.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@
5353
"tailwind-merge": "^3.0.2",
5454
"tailwindcss-animate": "^1.0.7",
5555
"use-stick-to-bottom": "^1.0.46",
56-
"uuid": "^11.0.5",
56+
"uuid": "^11.1.0",
5757
"zod": "^3.24.2"
5858
},
5959
"devDependencies": {
@@ -64,6 +64,7 @@
6464
"@types/react": "^19.0.8",
6565
"@types/react-dom": "^19.0.3",
6666
"@types/react-syntax-highlighter": "^15.5.13",
67+
"@types/uuid": "^10.0.0",
6768
"autoprefixer": "^10.4.20",
6869
"dotenv": "^16.4.7",
6970
"eslint": "^9.19.0",
@@ -81,8 +82,7 @@
8182
"typescript-eslint": "^8.22.0"
8283
},
8384
"overrides": {
84-
"react-is": "^19.0.0-rc-69d4b800-20241021",
85-
"@langchain/langgraph-checkpoint": "^0.0.16"
85+
"react-is": "^19.0.0-rc-69d4b800-20241021"
8686
},
8787
"packageManager": "[email protected]"
8888
}

pnpm-lock.yaml

Lines changed: 1366 additions & 1134 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import React from "react";
2+
import type { Base64ContentBlock } from "@langchain/core/messages";
3+
import { MultimodalPreview } from "../ui/MultimodalPreview";
4+
import { cn } from "@/lib/utils";
5+
6+
interface ContentBlocksPreviewProps {
7+
blocks: Base64ContentBlock[];
8+
onRemove: (idx: number) => void;
9+
size?: "sm" | "md" | "lg";
10+
className?: string;
11+
}
12+
13+
/**
14+
* Renders a preview of content blocks with optional remove functionality.
15+
* Uses cn utility for robust class merging.
16+
*/
17+
export const ContentBlocksPreview: React.FC<ContentBlocksPreviewProps> = ({
18+
blocks,
19+
onRemove,
20+
size = "md",
21+
className,
22+
}) => {
23+
if (!blocks.length) return null;
24+
return (
25+
<div className={cn("flex flex-wrap gap-2 p-3.5 pb-0", className)}>
26+
{blocks.map((block, idx) => (
27+
<MultimodalPreview
28+
key={idx}
29+
block={block}
30+
removable
31+
onRemove={() => onRemove(idx)}
32+
size={size}
33+
/>
34+
))}
35+
</div>
36+
);
37+
};

src/components/thread/index.tsx

Lines changed: 51 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ import {
2121
PanelRightClose,
2222
SquarePen,
2323
XIcon,
24+
Plus,
25+
CircleX,
2426
} from "lucide-react";
2527
import { useQueryState, parseAsBoolean } from "nuqs";
2628
import { StickToBottom, useStickToBottomContext } from "use-stick-to-bottom";
@@ -36,6 +38,8 @@ import {
3638
TooltipProvider,
3739
TooltipTrigger,
3840
} from "../ui/tooltip";
41+
import { useFileUpload } from "@/hooks/use-file-upload";
42+
import { ContentBlocksPreview } from "./ContentBlocksPreview";
3943
import {
4044
useArtifactOpen,
4145
ArtifactContent,
@@ -122,6 +126,14 @@ export function Thread() {
122126
parseAsBoolean.withDefault(false),
123127
);
124128
const [input, setInput] = useState("");
129+
const {
130+
contentBlocks,
131+
setContentBlocks,
132+
handleFileUpload,
133+
dropRef,
134+
removeBlock,
135+
resetBlocks,
136+
} = useFileUpload();
125137
const [firstTokenReceived, setFirstTokenReceived] = useState(false);
126138
const isLargeScreen = useMediaQuery("(min-width: 1024px)");
127139

@@ -183,13 +195,17 @@ export function Thread() {
183195

184196
const handleSubmit = (e: FormEvent) => {
185197
e.preventDefault();
186-
if (!input.trim() || isLoading) return;
198+
if ((input.trim().length === 0 && contentBlocks.length === 0) || isLoading)
199+
return;
187200
setFirstTokenReceived(false);
188201

189202
const newHumanMessage: Message = {
190203
id: uuidv4(),
191204
type: "human",
192-
content: input,
205+
content: [
206+
...(input.trim().length > 0 ? [{ type: "text", text: input }] : []),
207+
...contentBlocks,
208+
] as Message["content"],
193209
};
194210

195211
const toolMessages = ensureToolCallsHaveResponses(stream.messages);
@@ -214,6 +230,7 @@ export function Thread() {
214230
);
215231

216232
setInput("");
233+
setContentBlocks([]);
217234
};
218235

219236
const handleRegenerate = (
@@ -423,11 +440,18 @@ export function Thread() {
423440

424441
<ScrollToBottom className="animate-in fade-in-0 zoom-in-95 absolute bottom-full left-1/2 mb-4 -translate-x-1/2" />
425442

426-
<div className="bg-muted relative z-10 mx-auto mb-8 w-full max-w-3xl rounded-2xl border shadow-xs">
443+
<div
444+
ref={dropRef}
445+
className="bg-muted relative z-10 mx-auto mb-8 w-full max-w-3xl rounded-2xl border shadow-xs"
446+
>
427447
<form
428448
onSubmit={handleSubmit}
429449
className="mx-auto grid max-w-3xl grid-rows-[1fr_auto] gap-2"
430450
>
451+
<ContentBlocksPreview
452+
blocks={contentBlocks}
453+
onRemove={removeBlock}
454+
/>
431455
<textarea
432456
value={input}
433457
onChange={(e) => setInput(e.target.value)}
@@ -448,7 +472,7 @@ export function Thread() {
448472
className="field-sizing-content resize-none border-none bg-transparent p-3.5 pb-0 shadow-none ring-0 outline-none focus:ring-0 focus:outline-none"
449473
/>
450474

451-
<div className="flex items-center justify-between p-2 pt-4">
475+
<div className="flex items-center gap-6 p-2 pt-4">
452476
<div>
453477
<div className="flex items-center space-x-2">
454478
<Switch
@@ -464,19 +488,40 @@ export function Thread() {
464488
</Label>
465489
</div>
466490
</div>
491+
<Label
492+
htmlFor="file-input"
493+
className="flex cursor-pointer items-center gap-2"
494+
>
495+
<Plus className="size-5 text-gray-600" />
496+
<span className="text-sm text-gray-600">
497+
Upload PDF or Image
498+
</span>
499+
</Label>
500+
<input
501+
id="file-input"
502+
type="file"
503+
onChange={handleFileUpload}
504+
multiple
505+
accept="image/jpeg,image/png,image/gif,image/webp,application/pdf"
506+
className="hidden"
507+
/>
467508
{stream.isLoading ? (
468509
<Button
469510
key="stop"
470511
onClick={() => stream.stop()}
512+
className="ml-auto"
471513
>
472514
<LoaderCircle className="h-4 w-4 animate-spin" />
473515
Cancel
474516
</Button>
475517
) : (
476518
<Button
477519
type="submit"
478-
className="shadow-md transition-all"
479-
disabled={isLoading || !input.trim()}
520+
className="ml-auto shadow-md transition-all"
521+
disabled={
522+
isLoading ||
523+
(!input.trim() && contentBlocks.length === 0)
524+
}
480525
>
481526
Send
482527
</Button>

src/components/thread/messages/human.tsx

Lines changed: 60 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ import { getContentString } from "../utils";
55
import { cn } from "@/lib/utils";
66
import { Textarea } from "@/components/ui/textarea";
77
import { BranchSwitcher, CommandBar } from "./shared";
8+
import { MultimodalPreview } from "@/components/ui/MultimodalPreview";
9+
import type { Base64ContentBlock } from "@langchain/core/messages";
810

911
function EditableContent({
1012
value,
@@ -32,6 +34,36 @@ function EditableContent({
3234
);
3335
}
3436

37+
// Type guard for Base64ContentBlock
38+
function isBase64ContentBlock(block: unknown): block is Base64ContentBlock {
39+
if (typeof block !== "object" || block === null || !("type" in block))
40+
return false;
41+
// file type (legacy)
42+
if (
43+
(block as { type: unknown }).type === "file" &&
44+
"source_type" in block &&
45+
(block as { source_type: unknown }).source_type === "base64" &&
46+
"mime_type" in block &&
47+
typeof (block as { mime_type?: unknown }).mime_type === "string" &&
48+
((block as { mime_type: string }).mime_type.startsWith("image/") ||
49+
(block as { mime_type: string }).mime_type === "application/pdf")
50+
) {
51+
return true;
52+
}
53+
// image type (new)
54+
if (
55+
(block as { type: unknown }).type === "image" &&
56+
"source_type" in block &&
57+
(block as { source_type: unknown }).source_type === "base64" &&
58+
"mime_type" in block &&
59+
typeof (block as { mime_type?: unknown }).mime_type === "string" &&
60+
(block as { mime_type: string }).mime_type.startsWith("image/")
61+
) {
62+
return true;
63+
}
64+
return false;
65+
}
66+
3567
export function HumanMessage({
3668
message,
3769
isLoading,
@@ -84,9 +116,34 @@ export function HumanMessage({
84116
onSubmit={handleSubmitEdit}
85117
/>
86118
) : (
87-
<p className="bg-muted ml-auto w-fit rounded-3xl px-4 py-2 whitespace-pre-wrap">
88-
{contentString}
89-
</p>
119+
<div className="flex flex-col gap-2">
120+
{/* Render images and files if no text */}
121+
{Array.isArray(message.content) && message.content.length > 0 && (
122+
<div className="flex flex-col items-end gap-2">
123+
{message.content.reduce<React.ReactNode[]>(
124+
(acc, block, idx) => {
125+
if (isBase64ContentBlock(block)) {
126+
acc.push(
127+
<MultimodalPreview
128+
key={idx}
129+
block={block}
130+
size="md"
131+
/>,
132+
);
133+
}
134+
return acc;
135+
},
136+
[],
137+
)}
138+
</div>
139+
)}
140+
{/* Render text if present, otherwise fallback to file/image name */}
141+
{contentString ? (
142+
<p className="bg-muted ml-auto w-fit rounded-3xl px-4 py-2 text-right whitespace-pre-wrap">
143+
{contentString}
144+
</p>
145+
) : null}
146+
</div>
90147
)}
91148

92149
<div

src/components/thread/utils.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
import type { Message } from "@langchain/langgraph-sdk";
22

3+
/**
4+
* Extracts a string summary from a message's content, supporting multimodal (text, image, file, etc.).
5+
* - If text is present, returns the joined text.
6+
* - If not, returns a label for the first non-text modality (e.g., 'Image', 'Other').
7+
* - If unknown, returns 'Multimodal message'.
8+
*/
39
export function getContentString(content: Message["content"]): string {
410
if (typeof content === "string") return content;
511
const texts = content

0 commit comments

Comments
 (0)