56 lines
1.6 KiB
TypeScript
56 lines
1.6 KiB
TypeScript
// transcribeAudioMessage.ts
|
|
import { WhatsAppMessage } from './types';
|
|
import { decryptMedia } from '@open-wa/wa-automate';
|
|
import axios from 'axios';
|
|
import { GoogleGenAI, createUserContent } from '@google/genai';
|
|
|
|
/**
|
|
* Transcribe un mensaje de audio de WhatsApp usando Gemini.
|
|
* @param message - Mensaje recibido desde OpenWA.
|
|
* @returns Texto transcrito o null si no era un audio válido.
|
|
*/
|
|
export async function transcribeAudioMessage(message: WhatsAppMessage): Promise<string | null> {
|
|
if (
|
|
message.type !== 'ptt' &&
|
|
message.type !== 'audio' &&
|
|
message.mimetype !== 'audio/ogg; codecs=opus'
|
|
) {
|
|
return null;
|
|
}
|
|
|
|
const audioUrl = message.clientUrl || message.deprecatedMms3Url;
|
|
if (!audioUrl) throw new Error('El mensaje no tiene URL de audio');
|
|
|
|
const raw = await axios.get(audioUrl, { responseType: 'arraybuffer' });
|
|
|
|
const enrichedMessage = {
|
|
...message,
|
|
_data: {
|
|
...message,
|
|
_raw: raw.data
|
|
}
|
|
};
|
|
|
|
const decryptedBuffer = await decryptMedia(enrichedMessage as any);
|
|
const base64Audio = decryptedBuffer.toString('base64');
|
|
|
|
const apiKey = process.env.GOOGLE_API_KEY;
|
|
if (!apiKey) throw new Error('Falta GOOGLE_API_KEY');
|
|
|
|
const genAI = new GoogleGenAI({ apiKey });
|
|
const result = await genAI.models.generateContent({
|
|
model: 'gemini-2.0-flash',
|
|
contents: createUserContent([
|
|
{
|
|
inlineData: {
|
|
mimeType: 'audio/ogg',
|
|
data: base64Audio
|
|
}
|
|
},
|
|
'Transcribí este audio porfa. te estaran hablando en español honduras.'
|
|
])
|
|
});
|
|
|
|
return result.text?.trim() || null;
|
|
}
|