feat: Vision LLM and Device Camera API Example (#1352)

YasharF · web-flow · commit 8f4a6b72d4fc · 2025-04-22T04:50:51.000-07:00
diff --git a/.env.example b/.env.example
@@ -76,6 +76,7 @@ STRIPE_PKEY=pk_test_6pRNASCoBOKtIshFeQd4XMUh
 
 TOGETHERAI_API_KEY=sample-api-key
 TOGETHERAI_MODEL=meta-llama/Llama-3.3-70B-Instruct-Turbo-Free
+TOGETHERAI_VISION_MODEL=meta-llama/Llama-Vision-Free
 
 TRAKT_ID=trakt-client-id
 TRAKT_SECRET=trackt-client-secret
diff --git a/README.md b/README.md
@@ -82,9 +82,10 @@ I also tried to make it as **generic** and **reusable** as possible to cover mos
   - Delete Account
 - Contact Form (powered by SMTP via Sendgrid, Mailgun, AWS SES, etc.)
 - File upload
+- Device camera
 - **API Examples**
 
-  - **AI:** OpenAI Moderation, Together AI foundational model LLMs (aka Deepseek, Llama, Mistral, etc.)
+  - **AI:** OpenAI Moderation, LLAMA instruct, LLAMA vision (via Together AI serverless foundational models - Deepseek, Llama, Mistral, etc.)
   - **Backoffice:** Lob (USPS Mail), Paypal, Quickbooks, Stripe, Twilio (text messaging)
   - **Data, Media & Entertainment:** Alpha Vantage (stocks and finance info) with ChartJS, Github, Foursquare, Last.fm, New York Times, Trakt.tv (movies/TV), Twitch, Tumblr (OAuth 1.0a example), Web Scraping
   - **Maps and Location:** Google Maps, HERE Maps
@@ -108,7 +109,7 @@ I also tried to make it as **generic** and **reusable** as possible to cover mos
   - Hosted: No need to install, see the MongoDB Atlas section
 
 - [Node.js 22.12+](http://nodejs.org)
-  - Highly recommended: Use/Upgrade your NodeJS to the latest NodeJS 22 LTS version.
+  - Highly recommended: Use/Upgrade your Node.js to the latest Node.js 22 LTS version.
 - Command Line Tools
 - <img src="https://upload.wikimedia.org/wikipedia/commons/1/1b/Apple_logo_grey.svg" height="17">&nbsp;**Mac OS X:** [Xcode](https://itunes.apple.com/us/app/xcode/id497799835?mt=12) (or **OS X 10.9+**: `xcode-select --install`)
 - <img src="https://upload.wikimedia.org/wikipedia/commons/8/87/Windows_logo_-_2021.svg" height="17">&nbsp;**Windows:** [Visual Studio Code](https://code.visualstudio.com) + [Windows Subsystem for Linux - Ubuntu](https://learn.microsoft.com/en-us/windows/wsl/install) OR [Visual Studio](https://www.visualstudio.com/products/visual-studio-community-vs)
@@ -1023,7 +1024,7 @@ You now have a choice - to include your JavaScript code in Pug templates or have
 
 But it's also understandable if you want to take the easier road. Most of the time you don't even care about performance during hackathons, you just want to _"get shit done"_ before the time runs out. Well, either way, use whichever approach makes more sense to you. At the end of the day, it's **what** you build that matters, not **how** you build it.
 
-If you want to stick all your JavaScript inside templates, then in `layout.pug` - your main template file, add this to `head` block.
+If you want to stick all your JavaScript inside templates, then in `layout.pug` - your main template file, add this to the `head` block.
 
 ```pug
 script(src='/socket.io/socket.io.js')
diff --git a/app.js b/app.js
@@ -28,7 +28,7 @@ const secureTransfer = process.env.BASE_URL.startsWith('https');
  * Rate limiting configuration
  * This is a basic rate limiting configuration. You may want to adjust the settings
  * based on your application's needs and the expected traffic patterns.
- * Alos, consider adding a proxy such as cloudflare for production.
+ * Also, consider adding a proxy such as cloudflare for production.
  */
 // Global Rate Limiter Config
 const limiter = rateLimit({
@@ -124,8 +124,10 @@ app.use(passport.initialize());
 app.use(passport.session());
 app.use(flash());
 app.use((req, res, next) => {
-  if (req.path === '/api/upload') {
+  if (req.path === '/api/upload' || req.path === '/api/togetherai-camera') {
     // Multer multipart/form-data handling needs to occur before the Lusca CSRF check.
+    // WARN: Any path that is not protected by CSRF here should have lusca.csrf() chained
+    // in their route handler.
     next();
   } else {
     lusca.csrf()(req, res, next);
@@ -233,6 +235,8 @@ app.get('/api/openai-moderation', apiController.getOpenAIModeration);
 app.post('/api/openai-moderation', apiController.postOpenAIModeration);
 app.get('/api/togetherai-classifier', apiController.getTogetherAIClassifier);
 app.post('/api/togetherai-classifier', apiController.postTogetherAIClassifier);
+app.get('/api/togetherai-camera', lusca({ csrf: true }), apiController.getTogetherAICamera);
+app.post('/api/togetherai-camera', strictLimiter, apiController.imageUploadMiddleware, lusca({ csrf: true }), apiController.postTogetherAICamera);
 
 /**
  * OAuth authentication failure handler (common for all providers)
diff --git a/controllers/api.js b/controllers/api.js
@@ -1495,6 +1495,178 @@ exports.postOpenAIModeration = async (req, res) => {
   });
 };
 
+/**
+ * Helper functions and constants for Together AI API Example
+ * We are using LLMs to classify text or analyze a picture taken by the user's camera.
+ */
+
+// Shared Together AI API caller
+const callTogetherAiApi = async (apiRequestBody, apiKey) => {
+  const response = await fetch('https://api.together.xyz/v1/chat/completions', {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json',
+      Authorization: `Bearer ${apiKey}`,
+    },
+    body: JSON.stringify(apiRequestBody),
+  });
+  if (!response.ok) {
+    const errData = await response.json().catch(() => ({}));
+    console.error('Together AI API Error Response:', errData);
+    const errorMessage = errData.error && errData.error.message ? errData.error.message : `API Error: ${response.status}`;
+    throw new Error(errorMessage);
+  }
+  return response.json();
+};
+
+// Vision-specific functions
+const createVisionLLMRequestBody = (dataUrl, model) => ({
+  model,
+  messages: [
+    {
+      role: 'user',
+      content: [
+        {
+          type: 'text',
+          text: 'What is in this image?',
+        },
+        {
+          type: 'image_url',
+          image_url: {
+            url: dataUrl,
+          },
+        },
+      ],
+    },
+  ],
+});
+
+const extractVisionAnalysis = (data) => {
+  if (data.choices && Array.isArray(data.choices) && data.choices.length > 0 && data.choices[0].message && data.choices[0].message.content) {
+    return data.choices[0].message.content;
+  }
+  return 'No vision analysis available';
+};
+
+// Classifier-specific functions
+const createClassifierLLMRequestBody = (inputText, model, systemPrompt) => ({
+  model,
+  messages: [
+    { role: 'system', content: systemPrompt },
+    { role: 'user', content: inputText },
+  ],
+  temperature: 0,
+  max_tokens: 64,
+});
+
+const extractClassifierResponse = (content) => {
+  let department = null;
+  if (content) {
+    try {
+      // Try to extract JSON from the response
+      const jsonStringMatch = content.match(/{.*}/s);
+      if (jsonStringMatch) {
+        const parsed = JSON.parse(jsonStringMatch[0].replace(/'/g, '"'));
+        department = parsed.department;
+      }
+    } catch (err) {
+      console.log('Failed to parse JSON from TogetherAI API response:', err);
+      // fallback: try to extract department manually
+      const match = content.match(/"department"\s*:\s*"([^"]+)"/);
+      if (match) {
+        [, department] = match;
+      }
+    }
+  }
+  return department || 'Unknown';
+};
+
+// System prompt for the classifier
+// This is the system prompt that instructs the LLM on how to classify the customer message
+// into the appropriate department.
+const messageClassifierSystemPrompt = `You are a customer service classifier for an e-commerce platform. Your role is to identify the primary issue described by the customer and return the result in JSON format. Carefully analyze the customer's message and select one of the following departments as the classification result:
+
+Order Tracking and Status
+Returns and Refunds
+Payments and Billing Issues
+Account Management
+Product Inquiries
+Technical Support
+Shipping and Delivery Issues
+Promotions and Discounts
+Marketplace Seller Support
+Feedback and Complaints
+
+Provide the output in this JSON structure:
+
+{
+  "department": "<selected_department>"
+}
+Replace <selected_department> with the name of the most relevant department from the list above. If the inquiry spans multiple categories, choose the department that is most likely to address the customer's issue promptly and effectively.`;
+
+// Image Uploade middleware for Camera uploads
+const createImageUploader = () => {
+  const memoryStorage = multer.memoryStorage();
+  return multer({
+    storage: memoryStorage,
+    limits: { fileSize: 10 * 1024 * 1024 }, // 10MB limit
+  }).single('image');
+};
+
+exports.imageUploadMiddleware = (req, res, next) => {
+  const uploadToMemory = createImageUploader();
+  uploadToMemory(req, res, (err) => {
+    if (err) {
+      console.error('Upload error:', err);
+      return res.status(500).json({ error: err.message });
+    }
+    next();
+  });
+};
+
+const createImageDataUrl = (file) => {
+  const base64Image = file.buffer.toString('base64');
+  return `data:${file.mimetype};base64,${base64Image}`;
+};
+
+/**
+ * GET /api/togetherai-camera
+ * Together AI Camera Analysis Example
+ */
+exports.getTogetherAICamera = (req, res) => {
+  res.render('api/togetherai-camera', {
+    title: 'Together.ai Camera Analysis',
+    togetherAiModel: process.env.TOGETHERAI_VISION_MODEL,
+  });
+};
+
+/**
+ * POST /api/togetherai-camera
+ * Analyze image using Together AI Vision
+ */
+exports.postTogetherAICamera = async (req, res) => {
+  if (!req.file) {
+    return res.status(400).json({ error: 'No image provided' });
+  }
+  try {
+    const togetherAiKey = process.env.TOGETHERAI_API_KEY;
+    const togetherAiModel = process.env.TOGETHERAI_VISION_MODEL;
+    if (!togetherAiKey) {
+      return res.status(500).json({ error: 'TogetherAI API key is not set' });
+    }
+    const dataUrl = createImageDataUrl(req.file);
+    const apiRequestBody = createVisionLLMRequestBody(dataUrl, togetherAiModel);
+    // console.log('Making Vision API request to Together AI...');
+    const data = await callTogetherAiApi(apiRequestBody, togetherAiKey);
+    const analysis = extractVisionAnalysis(data);
+    // console.log('Vision analysis completed:', analysis);
+    res.json({ analysis });
+  } catch (error) {
+    console.error('Error analyzing image:', error);
+    res.status(500).json({ error: `Error analyzing image: ${error.message}` });
+  }
+};
+
 /**
  * GET /api/togetherai-classifier
  * Together AI / LLM API Example.
@@ -1503,6 +1675,7 @@ exports.getTogetherAIClassifier = (req, res) => {
   res.render('api/togetherai-classifier', {
     title: 'Together.ai/LLM Department Classifier',
     result: null,
+    togetherAiModel: process.env.TOGETHERAI_MODEL,
     error: null,
     input: '',
   });
@@ -1522,82 +1695,24 @@ exports.postTogetherAIClassifier = async (req, res) => {
   const inputText = (req.body.inputText || '').slice(0, 300);
   let result = null;
   let error = null;
-
   if (!togetherAiKey) {
     error = 'TogetherAI API key is not set in environment variables.';
   } else if (!togetherAiModel) {
     error = 'TogetherAI model is not set in environment variables.';
   } else if (!inputText.trim()) {
-    error = 'Please enter a message to classify.';
+    error = 'Please enter the customer message to classify.';
   } else {
     try {
-      const systemPrompt = `You are a customer service classifier for an e-commerce platform. Your role is to identify the primary issue described by the customer and return the result in JSON format. Carefully analyze the customer's message and select one of the following departments as the classification result:
-
-Order Tracking and Status
-Returns and Refunds
-Payments and Billing Issues
-Account Management
-Product Inquiries
-Technical Support
-Shipping and Delivery Issues
-Promotions and Discounts
-Marketplace Seller Support
-Feedback and Complaints
-
-Provide the output in this JSON structure:
-
-{
-  "department": "<selected_department>"
-}
-Replace <selected_department> with the name of the most relevant department from the list above. If the inquiry spans multiple categories, choose the department that is most likely to address the customer's issue promptly and effectively.`;
-
-      const response = await fetch('https://api.together.xyz/v1/chat/completions', {
-        method: 'POST',
-        headers: {
-          'Content-Type': 'application/json',
-          Authorization: `Bearer ${togetherAiKey}`,
-        },
-        body: JSON.stringify({
-          model: togetherAiModel,
-          messages: [
-            { role: 'system', content: systemPrompt },
-            { role: 'user', content: inputText },
-          ],
-          temperature: 0,
-          max_tokens: 64,
-        }),
-      });
-
-      if (!response.ok) {
-        const errData = await response.json().catch(() => ({}));
-        error = errData.error && errData.error.message ? errData.error.message : `API Error: ${response.status}`;
-      } else {
-        const data = await response.json();
-        const content = data.choices && data.choices[0] && data.choices[0].message && data.choices[0].message.content;
-        let department = null;
-        if (content) {
-          try {
-            // Try to extract JSON from the response
-            const jsonStringMatch = content.match(/{.*}/s);
-            if (jsonStringMatch) {
-              const parsed = JSON.parse(jsonStringMatch[0].replace(/'/g, '"'));
-              department = parsed.department;
-            }
-          } catch (err) {
-            console.log('Failed to parse JSON from TogetherAI API response:', err);
-            // fallback: try to extract department manually
-            const match = content.match(/"department"\s*:\s*"([^"]+)"/);
-            if (match) {
-              [, department] = match;
-            }
-          }
-        }
-        result = {
-          department: department || 'Unknown',
-          raw: content,
-          systemPrompt, // Send the sysetemPrompt to the front-end for this demo, not actual production applications.
-        };
-      }
+      const systemPrompt = messageClassifierSystemPrompt; // Your existing system prompt here
+      const apiRequestBody = createClassifierLLMRequestBody(inputText, togetherAiModel, systemPrompt);
+      const data = await callTogetherAiApi(apiRequestBody, togetherAiKey);
+      const content = data.choices && data.choices[0] && data.choices[0].message && data.choices[0].message.content;
+      const department = extractClassifierResponse(content);
+      result = {
+        department,
+        raw: content,
+        systemPrompt,
+      };
     } catch (err) {
       console.log('TogetherAI Classifier API Error:', err);
       error = 'Failed to call TogetherAI API.';
diff --git a/views/api/index.pug b/views/api/index.pug
@@ -141,5 +141,11 @@ block content
       a(href='/api/togetherai-classifier', style='color: #000')
         .card.mb-3(style='background-color: rgb(128, 181, 255)')
           .card-body
-            img(src='https://i.imgur.com/dOCkJxT.png', height=40, style='padding: 0px 10px 0px 0px')
-            | Together AI - one-shot LLM
+            img(src='https://i.imgur.com/dOCkJxT.png', height=40, width=100, style='padding-right: 5px; object-fit: contain')
+            | Llama Instruct
+    .col-md-4
+      a(href='/api/togetherai-camera', style='color: #000')
+        .card.mb-3(style='background-color: rgb(128, 181, 255)')
+          .card-body
+            img(src='https://i.imgur.com/dOCkJxT.png', height=40, width=100, style='padding-right: 5px; object-fit: contain')
+            | Llama Vision + Camera
diff --git a/views/api/togetherai-camera.pug b/views/api/togetherai-camera.pug
diff --git a/views/api/togetherai-classifier.pug b/views/api/togetherai-classifier.pug