ya se puede transcribir mensajes y recibirlos en el agent
This commit is contained in:
11207
whatsapp-router/package-lock.json
generated
11207
whatsapp-router/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -10,12 +10,16 @@
|
||||
},
|
||||
"dependencies": {
|
||||
"@google/genai": "^1.4.0",
|
||||
"@open-wa/wa-automate": "^4.76.0",
|
||||
"axios": "^1.5.0",
|
||||
"dotenv": "^16.5.0",
|
||||
"express": "^4.18.2"
|
||||
"express": "^4.18.2",
|
||||
"ffmpeg-static": "^5.2.0",
|
||||
"fluent-ffmpeg": "^2.1.3"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/express": "^4.17.21",
|
||||
"@types/fluent-ffmpeg": "^2.1.27",
|
||||
"@types/node": "^20.11.19",
|
||||
"nodemon": "^3.1.10",
|
||||
"ts-node": "^10.9.2",
|
||||
|
||||
@@ -109,7 +109,7 @@ export async function buildConversation(
|
||||
title,
|
||||
isGroup,
|
||||
unreadCount,
|
||||
participants: Array.from(participantsMap.values()),
|
||||
participants: Array.from(participantsMap.values()),
|
||||
messages,
|
||||
createdAt: conversations.get(chatId)?.createdAt || now,
|
||||
};
|
||||
@@ -144,5 +144,6 @@ export async function addMessageToConversation(
|
||||
isMe: s.isMe,
|
||||
});
|
||||
}
|
||||
|
||||
return conv;
|
||||
}
|
||||
|
||||
55
whatsapp-router/src/transcribeAudioMessage.ts
Normal file
55
whatsapp-router/src/transcribeAudioMessage.ts
Normal file
@@ -0,0 +1,55 @@
|
||||
// transcribeAudioMessage.ts
|
||||
import { WhatsAppMessage } from './types';
|
||||
import { decryptMedia } from '@open-wa/wa-automate';
|
||||
import axios from 'axios';
|
||||
import { GoogleGenAI, createUserContent } from '@google/genai';
|
||||
|
||||
/**
|
||||
* Transcribe un mensaje de audio de WhatsApp usando Gemini.
|
||||
* @param message - Mensaje recibido desde OpenWA.
|
||||
* @returns Texto transcrito o null si no era un audio válido.
|
||||
*/
|
||||
export async function transcribeAudioMessage(message: WhatsAppMessage): Promise<string | null> {
|
||||
if (
|
||||
message.type !== 'ptt' &&
|
||||
message.type !== 'audio' &&
|
||||
message.mimetype !== 'audio/ogg; codecs=opus'
|
||||
) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const audioUrl = message.clientUrl || message.deprecatedMms3Url;
|
||||
if (!audioUrl) throw new Error('El mensaje no tiene URL de audio');
|
||||
|
||||
const raw = await axios.get(audioUrl, { responseType: 'arraybuffer' });
|
||||
|
||||
const enrichedMessage = {
|
||||
...message,
|
||||
_data: {
|
||||
...message,
|
||||
_raw: raw.data
|
||||
}
|
||||
};
|
||||
|
||||
const decryptedBuffer = await decryptMedia(enrichedMessage as any);
|
||||
const base64Audio = decryptedBuffer.toString('base64');
|
||||
|
||||
const apiKey = process.env.GOOGLE_API_KEY;
|
||||
if (!apiKey) throw new Error('Falta GOOGLE_API_KEY');
|
||||
|
||||
const genAI = new GoogleGenAI({ apiKey });
|
||||
const result = await genAI.models.generateContent({
|
||||
model: 'gemini-2.0-flash',
|
||||
contents: createUserContent([
|
||||
{
|
||||
inlineData: {
|
||||
mimeType: 'audio/ogg',
|
||||
data: base64Audio
|
||||
}
|
||||
},
|
||||
'Transcribí este audio porfa. te estaran hablando en español honduras.'
|
||||
])
|
||||
});
|
||||
|
||||
return result.text?.trim() || null;
|
||||
}
|
||||
@@ -4,6 +4,7 @@ import { GoogleGenAI } from '@google/genai';
|
||||
import { getHandler } from './chatHandlers';
|
||||
import { addMessageToConversation } from './store/conversation';
|
||||
import { WhatsAppMessage, Conversation } from './types';
|
||||
import { transcribeAudioMessage } from './transcribeAudioMessage';
|
||||
|
||||
export interface WebhookConfig {
|
||||
API_URL: string;
|
||||
@@ -32,6 +33,12 @@ export function registerWebhookRoutes(
|
||||
|
||||
if (message) {
|
||||
const origen = from || message.chatId || 'desconocido';
|
||||
|
||||
if(origen == '50493849962@c.us') //si el mensajes es de un agente, no lo proceses
|
||||
{
|
||||
return res.sendStatus(200);
|
||||
}
|
||||
|
||||
console.log(`📩 Mensaje recibido (${message.text}) de ${origen}`);
|
||||
}
|
||||
|
||||
@@ -41,6 +48,8 @@ export function registerWebhookRoutes(
|
||||
const chatId = message.chatId || from;
|
||||
|
||||
// Audio message handling
|
||||
// console.log(message);
|
||||
|
||||
if (
|
||||
message.type === 'ptt' &&
|
||||
message.mimetype === 'audio/ogg; codecs=opus'
|
||||
@@ -53,33 +62,12 @@ export function registerWebhookRoutes(
|
||||
}
|
||||
console.log('🎤 Mensaje de audio detectado', audioUrl);
|
||||
try {
|
||||
// Download audio using the /downloadFileWithCredentials endpoint
|
||||
const audioResponse = await axios.post(`${openWaUrl}/downloadFileWithCredentials`, {
|
||||
args: { url: audioUrl },
|
||||
});
|
||||
const audioBase64 = audioResponse.data; // This is already a base64 string
|
||||
|
||||
const apiKey = process.env.GOOGLE_API_KEY;
|
||||
if (!apiKey) {
|
||||
throw new Error('GOOGLE_API_KEY is not set');
|
||||
}
|
||||
const genAI = new GoogleGenAI({ apiKey });
|
||||
|
||||
// Corrected Gemini API call structure
|
||||
const result = await genAI.models.generateContent({
|
||||
model: 'gemini-pro', // Ensure this model supports inline audio or use appropriate one
|
||||
contents: [
|
||||
{ inlineData: { mimeType: 'audio/ogg', data: audioBase64 } },
|
||||
{ text: 'Generate a transcript of the speech.' },
|
||||
],
|
||||
});
|
||||
// result directly is GenerateContentResponse
|
||||
const transcript = result.text; // Use the getter for text
|
||||
if (transcript === undefined) {
|
||||
throw new Error('Transcription resulted in undefined text.');
|
||||
}
|
||||
const transcript = await transcribeAudioMessage(message);
|
||||
console.log('📝 Transcripción:', transcript);
|
||||
message.body = transcript;
|
||||
message.body = transcript || '';
|
||||
message.text = transcript || '';
|
||||
|
||||
|
||||
} catch (transcriptionError: any) {
|
||||
console.error('Error en la transcripción:', transcriptionError.message);
|
||||
const reply =
|
||||
@@ -90,6 +78,7 @@ export function registerWebhookRoutes(
|
||||
}
|
||||
}
|
||||
|
||||
console.log(message);
|
||||
let conv: Conversation | undefined;
|
||||
if (chatId) {
|
||||
try {
|
||||
|
||||
Reference in New Issue
Block a user