/** * Script to scrape Baileys message history documentation from baileys.wiki * Focused on fetching messages and chat history functionality */ import * as fs from 'fs' const BASE_URL = 'https://baileys.wiki/docs/api' interface DocSection { name: string path: string type: 'interface' | 'type' | 'function' | 'variable' | 'class' | 'enum' category: string } // Sections specifically related to message history const SECTIONS: DocSection[] = [ // Core message types { name: 'WAMessage', path: '/type-aliases/WAMessage', type: 'type', category: 'Message Types' }, { name: 'WAMessageKey', path: '/type-aliases/WAMessageKey', type: 'type', category: 'Message Types' }, { name: 'WAMessageCursor', path: '/type-aliases/WAMessageCursor', type: 'type', category: 'Message Types' }, { name: 'WAMessageUpdate', path: '/type-aliases/WAMessageUpdate', type: 'type', category: 'Message Types' }, { name: 'MinimalMessage', path: '/type-aliases/MinimalMessage', type: 'type', category: 'Message Types' }, { name: 'RecentMessage', path: '/interfaces/RecentMessage', type: 'interface', category: 'Message Types' }, { name: 'RecentMessageKey', path: '/interfaces/RecentMessageKey', type: 'interface', category: 'Message Types' }, { name: 'LastMessageList', path: '/type-aliases/LastMessageList', type: 'type', category: 'Message Types' }, // Chat types (messages are in chats) { name: 'Chat', path: '/type-aliases/Chat', type: 'type', category: 'Chat Types' }, { name: 'ChatModification', path: '/type-aliases/ChatModification', type: 'type', category: 'Chat Types' }, { name: 'ChatMutation', path: '/type-aliases/ChatMutation', type: 'type', category: 'Chat Types' }, // History sync functions { name: 'downloadAndProcessHistorySyncNotification', path: '/functions/downloadAndProcessHistorySyncNotification', type: 'function', category: 'History Functions' }, { name: 'processHistoryMessage', path: '/functions/processHistoryMessage', type: 'function', category: 'History Functions' }, { name: 'getHistoryMsg', path: '/functions/getHistoryMsg', type: 'function', category: 'History Functions' }, { name: 'downloadHistory', path: '/functions/downloadHistory', type: 'function', category: 'History Functions' }, // Socket configuration for history { name: 'WASocket', path: '/type-aliases/WASocket', type: 'type', category: 'Socket' }, { name: 'SocketConfig', path: '/type-aliases/SocketConfig', type: 'type', category: 'Socket' }, { name: 'UserFacingSocketConfig', path: '/type-aliases/UserFacingSocketConfig', type: 'type', category: 'Socket' }, // Events related to messages/history { name: 'BaileysEventMap', path: '/type-aliases/BaileysEventMap', type: 'type', category: 'Events' }, { name: 'BaileysEventEmitter', path: '/interfaces/BaileysEventEmitter', type: 'interface', category: 'Events' }, { name: 'MessageUpsertType', path: '/type-aliases/MessageUpsertType', type: 'type', category: 'Events' }, // Message content extraction { name: 'extractMessageContent', path: '/functions/extractMessageContent', type: 'function', category: 'Message Utils' }, { name: 'getContentType', path: '/functions/getContentType', type: 'function', category: 'Message Utils' }, { name: 'normalizeMessageContent', path: '/functions/normalizeMessageContent', type: 'function', category: 'Message Utils' }, // Variables related to history { name: 'PROCESSABLE_HISTORY_TYPES', path: '/variables/PROCESSABLE_HISTORY_TYPES', type: 'variable', category: 'Constants' }, ] async function fetchPage(url: string): Promise { const response = await fetch(url) if (!response.ok) { throw new Error(`Failed to fetch ${url}: ${response.status}`) } return response.text() } function extractContent(html: string): string { // Remove script tags and style tags html = html.replace(/]*>[\s\S]*?<\/script>/gi, '') html = html.replace(/]*>[\s\S]*?<\/style>/gi, '') html = html.replace(/]*>[\s\S]*?<\/nav>/gi, '') html = html.replace(/]*>[\s\S]*?<\/footer>/gi, '') html = html.replace(/]*>[\s\S]*?<\/header>/gi, '') html = html.replace(/]*>[\s\S]*?<\/aside>/gi, '') // Extract main/article content let mainMatch = html.match(/]*>([\s\S]*?)<\/main>/i) if (!mainMatch) { mainMatch = html.match(/]*>([\s\S]*?)<\/article>/i) } const content = mainMatch ? mainMatch[1] : html // Convert HTML to markdown-like text let text = content // Headers .replace(/]*>([\s\S]*?)<\/h1>/gi, '\n# $1\n') .replace(/]*>([\s\S]*?)<\/h2>/gi, '\n## $1\n') .replace(/]*>([\s\S]*?)<\/h3>/gi, '\n### $1\n') .replace(/]*>([\s\S]*?)<\/h4>/gi, '\n#### $1\n') // Code blocks .replace(/]*>]*>([\s\S]*?)<\/code><\/pre>/gi, '\n```typescript\n$1\n```\n') .replace(/]*>([\s\S]*?)<\/code>/gi, '`$1`') // Lists .replace(/]*>([\s\S]*?)<\/li>/gi, '- $1\n') .replace(/]*>/gi, '\n') .replace(/<\/ul>/gi, '\n') // Paragraphs .replace(/]*>([\s\S]*?)<\/p>/gi, '\n$1\n') // Links - keep only text for cleaner output .replace(/]*>([\s\S]*?)<\/a>/gi, '$1') // Bold/Italic .replace(/]*>([\s\S]*?)<\/strong>/gi, '**$1**') .replace(/]*>([\s\S]*?)<\/em>/gi, '*$1*') // Line breaks .replace(//gi, '\n') // Divs and spans .replace(/]*>/gi, '\n') .replace(/<\/div>/gi, '\n') .replace(/]*>/gi, '') .replace(/<\/span>/gi, '') // Tables (simplified) .replace(/]*>/gi, '\n') .replace(/<\/table>/gi, '\n') .replace(/]*>/gi, '') .replace(/<\/tr>/gi, '\n') .replace(/]*>([\s\S]*?)<\/td>/gi, '| $1 ') .replace(/]*>([\s\S]*?)<\/th>/gi, '| **$1** ') // Remove remaining tags .replace(/<[^>]+>/g, '') // Decode HTML entities .replace(/</g, '<') .replace(/>/g, '>') .replace(/&/g, '&') .replace(/"/g, '"') .replace(/'/g, "'") .replace(/ /g, ' ') // Clean up whitespace .replace(/\n\s*\n\s*\n/g, '\n\n') .replace(/^\s+|\s+$/g, '') return text } async function scrapeSection(section: DocSection): Promise { const url = `${BASE_URL}${section.path}` console.log(`Fetching ${section.name}...`) try { const html = await fetchPage(url) const content = extractContent(html) return ` --- ## ${section.type.charAt(0).toUpperCase() + section.type.slice(1)}: ${section.name} **Source:** ${url} ${content} ` } catch (error) { console.error(`Error fetching ${section.name}:`, (error as Error).message) return ` --- ## ${section.type.charAt(0).toUpperCase() + section.type.slice(1)}: ${section.name} **Source:** ${url} *Error: Could not fetch documentation* ` } } async function main() { console.log('Starting Baileys message history documentation scrape...\n') // Group sections by category const categories = [...new Set(SECTIONS.map(s => s.category))] const markdown: string[] = [ `# Baileys Message History API Reference > Documentation for fetching and managing message history in WhatsApp Nucleo > Source: https://baileys.wiki > Generated: ${new Date().toISOString()} This document contains the Baileys API documentation specifically for: - Fetching message history from chats - History sync functionality - Message events and types ## Table of Contents ${categories.map(cat => { const items = SECTIONS.filter(s => s.category === cat) return `### ${cat} ${items.map(s => `- [${s.name}](#${s.type}-${s.name.toLowerCase()})`).join('\n')}` }).join('\n\n')} ## Quick Reference: How to Fetch Messages ### Using fetchMessageHistory (WASocket method) \`\`\`typescript // The WASocket has a fetchMessageHistory method: sock.fetchMessageHistory( count: number, // Number of messages to fetch oldestMsgKey: WAMessageKey, // Key of the oldest message you have oldestMsgTimestamp: number // Timestamp of the oldest message ) \`\`\` ### Listening to History Sync Events \`\`\`typescript sock.ev.on('messaging-history.set', ({ chats, contacts, messages, isLatest }) => { // messages: WAMessage[] - reverse chronologically sorted // chats: Chat[] - chat metadata // isLatest: boolean - if this is the most recent sync }) \`\`\` ### Configuration Options \`\`\`typescript const sock = makeWASocket({ // ... other config syncFullHistory: true, // Request full history from phone shouldSyncHistoryMessage: (msg) => true, // Control which messages to sync getMessage: async (key) => { // Implement to fetch message from your store // Required for message retries } }) \`\`\` ` ] // Process each category for (const category of categories) { markdown.push(`\n# ${category}\n`) const sections = SECTIONS.filter(s => s.category === category) for (const section of sections) { const content = await scrapeSection(section) markdown.push(content) // Small delay to be nice to the server await new Promise(r => setTimeout(r, 300)) } } // Write to file const outputPath = './docs/baileys-message-history-reference.md' fs.writeFileSync(outputPath, markdown.join('\n')) console.log(`\nDocumentation saved to ${outputPath}`) console.log(`Total sections: ${SECTIONS.length}`) } main().catch(console.error)