ya se puede transcribir mensajes y recibirlos en el agent

2025-06-06 14:43:42 -06:00
parent 5f8ba127ae
commit 5e5c7cd556
7 changed files with 8629 additions and 2786 deletions
--- a/whatsapp-router/package-lock.json
+++ b/whatsapp-router/package-lock.json
--- a/whatsapp-router/package.json
+++ b/whatsapp-router/package.json
@@ -10,12 +10,16 @@
  },
  "dependencies": {
    "@google/genai": "^1.4.0",
+    "@open-wa/wa-automate": "^4.76.0",
    "axios": "^1.5.0",
    "dotenv": "^16.5.0",
-    "express": "^4.18.2"
+    "express": "^4.18.2",
+    "ffmpeg-static": "^5.2.0",
+    "fluent-ffmpeg": "^2.1.3"
  },
  "devDependencies": {
    "@types/express": "^4.17.21",
+    "@types/fluent-ffmpeg": "^2.1.27",
    "@types/node": "^20.11.19",
    "nodemon": "^3.1.10",
    "ts-node": "^10.9.2",
--- a/whatsapp-router/src/store/conversation.ts
+++ b/whatsapp-router/src/store/conversation.ts
@@ -109,7 +109,7 @@ export async function buildConversation(
    title,
    isGroup,
    unreadCount,
-    participants: Array.from(participantsMap.values()),
+    participants: Array.from(participantsMap.values()), 
    messages,
    createdAt: conversations.get(chatId)?.createdAt || now,
  };
@@ -144,5 +144,6 @@ export async function addMessageToConversation(
      isMe: s.isMe,
    });
  }
+
  return conv;
 }
--- a/whatsapp-router/src/transcribeAudioMessage.ts
+++ b/whatsapp-router/src/transcribeAudioMessage.ts
@@ -0,0 +1,55 @@
+// transcribeAudioMessage.ts
+import { WhatsAppMessage } from './types';
+import { decryptMedia } from '@open-wa/wa-automate';
+import axios from 'axios';
+import { GoogleGenAI, createUserContent } from '@google/genai';
+
+/**
+ * Transcribe un mensaje de audio de WhatsApp usando Gemini.
+ * @param message - Mensaje recibido desde OpenWA.
+ * @returns Texto transcrito o null si no era un audio válido.
+ */
+export async function transcribeAudioMessage(message: WhatsAppMessage): Promise<string | null> {
+  if (
+    message.type !== 'ptt' &&
+    message.type !== 'audio' &&
+    message.mimetype !== 'audio/ogg; codecs=opus'
+  ) {
+    return null;
+  }
+
+  const audioUrl = message.clientUrl || message.deprecatedMms3Url;
+  if (!audioUrl) throw new Error('El mensaje no tiene URL de audio');
+
+  const raw = await axios.get(audioUrl, { responseType: 'arraybuffer' });
+
+  const enrichedMessage = {
+    ...message,
+    _data: {
+      ...message,
+      _raw: raw.data
+    }
+  };
+
+  const decryptedBuffer = await decryptMedia(enrichedMessage as any);
+  const base64Audio = decryptedBuffer.toString('base64');
+
+  const apiKey = process.env.GOOGLE_API_KEY;
+  if (!apiKey) throw new Error('Falta GOOGLE_API_KEY');
+
+  const genAI = new GoogleGenAI({ apiKey });
+  const result = await genAI.models.generateContent({
+    model: 'gemini-2.0-flash',
+    contents: createUserContent([
+      {
+        inlineData: {
+          mimeType: 'audio/ogg',
+          data: base64Audio
+        }
+      },
+      'Transcribí este audio porfa. te estaran hablando en español honduras.'
+    ])
+  });
+
+  return result.text?.trim() || null;
+}
--- a/whatsapp-router/src/webhook.ts
+++ b/whatsapp-router/src/webhook.ts
@@ -4,6 +4,7 @@ import { GoogleGenAI } from '@google/genai';
 import { getHandler } from './chatHandlers';
 import { addMessageToConversation } from './store/conversation';
 import { WhatsAppMessage, Conversation } from './types';
+import { transcribeAudioMessage } from './transcribeAudioMessage';

 export interface WebhookConfig {
  API_URL: string;
@@ -32,6 +33,12 @@ export function registerWebhookRoutes(

    if (message) {
      const origen = from || message.chatId || 'desconocido';
+
+      if(origen == '50493849962@c.us')  //si el mensajes es de un agente, no lo proceses
+      {
+        return res.sendStatus(200);
+      }
+
      console.log(`📩 Mensaje recibido (${message.text}) de ${origen}`);
    }

@@ -41,6 +48,8 @@ export function registerWebhookRoutes(
      const chatId = message.chatId || from;

      // Audio message handling
+      // console.log(message);
+      
      if (
        message.type === 'ptt' &&
        message.mimetype === 'audio/ogg; codecs=opus'
@@ -53,33 +62,12 @@ export function registerWebhookRoutes(
        }
        console.log('🎤 Mensaje de audio detectado', audioUrl);
        try {
-          // Download audio using the /downloadFileWithCredentials endpoint
-          const audioResponse = await axios.post(`${openWaUrl}/downloadFileWithCredentials`, {
-            args: { url: audioUrl },
-          });
-          const audioBase64 = audioResponse.data; // This is already a base64 string
-
-          const apiKey = process.env.GOOGLE_API_KEY;
-          if (!apiKey) {
-            throw new Error('GOOGLE_API_KEY is not set');
-          }
-          const genAI = new GoogleGenAI({ apiKey });
-
-          // Corrected Gemini API call structure
-          const result = await genAI.models.generateContent({
-            model: 'gemini-pro', // Ensure this model supports inline audio or use appropriate one
-            contents: [
-              { inlineData: { mimeType: 'audio/ogg', data: audioBase64 } },
-              { text: 'Generate a transcript of the speech.' },
-            ],
-          });
-          // result directly is GenerateContentResponse
-          const transcript = result.text; // Use the getter for text
-          if (transcript === undefined) {
-            throw new Error('Transcription resulted in undefined text.');
-          }
+          const transcript = await transcribeAudioMessage(message);
          console.log('📝 Transcripción:', transcript);
-          message.body = transcript;
+          message.body = transcript || '';
+          message.text = transcript || '';
+
+          
        } catch (transcriptionError: any) {
          console.error('Error en la transcripción:', transcriptionError.message);
          const reply =
@@ -90,6 +78,7 @@ export function registerWebhookRoutes(
        }
      }

+      console.log(message);
      let conv: Conversation | undefined;
      if (chatId) {
        try {