ya se puede transcribir mensajes y recibirlos en el agent

This commit is contained in:
2025-06-06 14:43:42 -06:00
parent 5f8ba127ae
commit 5e5c7cd556
7 changed files with 8629 additions and 2786 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -10,12 +10,16 @@
},
"dependencies": {
"@google/genai": "^1.4.0",
"@open-wa/wa-automate": "^4.76.0",
"axios": "^1.5.0",
"dotenv": "^16.5.0",
"express": "^4.18.2"
"express": "^4.18.2",
"ffmpeg-static": "^5.2.0",
"fluent-ffmpeg": "^2.1.3"
},
"devDependencies": {
"@types/express": "^4.17.21",
"@types/fluent-ffmpeg": "^2.1.27",
"@types/node": "^20.11.19",
"nodemon": "^3.1.10",
"ts-node": "^10.9.2",

View File

@@ -109,7 +109,7 @@ export async function buildConversation(
title,
isGroup,
unreadCount,
participants: Array.from(participantsMap.values()),
participants: Array.from(participantsMap.values()),
messages,
createdAt: conversations.get(chatId)?.createdAt || now,
};
@@ -144,5 +144,6 @@ export async function addMessageToConversation(
isMe: s.isMe,
});
}
return conv;
}

View File

@@ -0,0 +1,55 @@
// transcribeAudioMessage.ts
import { WhatsAppMessage } from './types';
import { decryptMedia } from '@open-wa/wa-automate';
import axios from 'axios';
import { GoogleGenAI, createUserContent } from '@google/genai';
/**
* Transcribe un mensaje de audio de WhatsApp usando Gemini.
* @param message - Mensaje recibido desde OpenWA.
* @returns Texto transcrito o null si no era un audio válido.
*/
export async function transcribeAudioMessage(message: WhatsAppMessage): Promise<string | null> {
if (
message.type !== 'ptt' &&
message.type !== 'audio' &&
message.mimetype !== 'audio/ogg; codecs=opus'
) {
return null;
}
const audioUrl = message.clientUrl || message.deprecatedMms3Url;
if (!audioUrl) throw new Error('El mensaje no tiene URL de audio');
const raw = await axios.get(audioUrl, { responseType: 'arraybuffer' });
const enrichedMessage = {
...message,
_data: {
...message,
_raw: raw.data
}
};
const decryptedBuffer = await decryptMedia(enrichedMessage as any);
const base64Audio = decryptedBuffer.toString('base64');
const apiKey = process.env.GOOGLE_API_KEY;
if (!apiKey) throw new Error('Falta GOOGLE_API_KEY');
const genAI = new GoogleGenAI({ apiKey });
const result = await genAI.models.generateContent({
model: 'gemini-2.0-flash',
contents: createUserContent([
{
inlineData: {
mimeType: 'audio/ogg',
data: base64Audio
}
},
'Transcribí este audio porfa. te estaran hablando en español honduras.'
])
});
return result.text?.trim() || null;
}

View File

@@ -4,6 +4,7 @@ import { GoogleGenAI } from '@google/genai';
import { getHandler } from './chatHandlers';
import { addMessageToConversation } from './store/conversation';
import { WhatsAppMessage, Conversation } from './types';
import { transcribeAudioMessage } from './transcribeAudioMessage';
export interface WebhookConfig {
API_URL: string;
@@ -32,6 +33,12 @@ export function registerWebhookRoutes(
if (message) {
const origen = from || message.chatId || 'desconocido';
if(origen == '50493849962@c.us') //si el mensajes es de un agente, no lo proceses
{
return res.sendStatus(200);
}
console.log(`📩 Mensaje recibido (${message.text}) de ${origen}`);
}
@@ -41,6 +48,8 @@ export function registerWebhookRoutes(
const chatId = message.chatId || from;
// Audio message handling
// console.log(message);
if (
message.type === 'ptt' &&
message.mimetype === 'audio/ogg; codecs=opus'
@@ -53,33 +62,12 @@ export function registerWebhookRoutes(
}
console.log('🎤 Mensaje de audio detectado', audioUrl);
try {
// Download audio using the /downloadFileWithCredentials endpoint
const audioResponse = await axios.post(`${openWaUrl}/downloadFileWithCredentials`, {
args: { url: audioUrl },
});
const audioBase64 = audioResponse.data; // This is already a base64 string
const apiKey = process.env.GOOGLE_API_KEY;
if (!apiKey) {
throw new Error('GOOGLE_API_KEY is not set');
}
const genAI = new GoogleGenAI({ apiKey });
// Corrected Gemini API call structure
const result = await genAI.models.generateContent({
model: 'gemini-pro', // Ensure this model supports inline audio or use appropriate one
contents: [
{ inlineData: { mimeType: 'audio/ogg', data: audioBase64 } },
{ text: 'Generate a transcript of the speech.' },
],
});
// result directly is GenerateContentResponse
const transcript = result.text; // Use the getter for text
if (transcript === undefined) {
throw new Error('Transcription resulted in undefined text.');
}
const transcript = await transcribeAudioMessage(message);
console.log('📝 Transcripción:', transcript);
message.body = transcript;
message.body = transcript || '';
message.text = transcript || '';
} catch (transcriptionError: any) {
console.error('Error en la transcripción:', transcriptionError.message);
const reply =
@@ -90,6 +78,7 @@ export function registerWebhookRoutes(
}
}
console.log(message);
let conv: Conversation | undefined;
if (chatId) {
try {