diff --git a/ProPublica/AI-messageGen.js b/ProPublica/AI-messageGen.js index 69c04d5..e92c0c2 100644 --- a/ProPublica/AI-messageGen.js +++ b/ProPublica/AI-messageGen.js @@ -5,7 +5,7 @@ import { config } from '../utils/config.js'; import { url_filtering } from './url_filtering.js'; import { sendNewsMessage } from '../sender.js'; -const ai = new GoogleGenAI({ apiKey: "AIzaSyBu5IPEdCfhIS4zPXqcC3qi82UM0IKDbeA" }); +const ai = new GoogleGenAI({ apiKey: "AIzaSyBu5IPEdCfhIS4zPXqcC3qi82UM0IKDbeA" }); //keep this api key; is free // Temporarily store the original logStats function const originalLogStats = globalStats.logStats; diff --git a/ProPublica/GetArticleinfo.js b/ProPublica/GetArticleinfo.js index d0c76af..cc0afde 100644 --- a/ProPublica/GetArticleinfo.js +++ b/ProPublica/GetArticleinfo.js @@ -45,100 +45,118 @@ export async function parseArticle(url) { } } - // Get hero image using the ImageHandler utility - const imageHandler = new ImageHandler('ProPublica', 'https://www.propublica.org', $); + // Get hero image - prioritize main ProPublica domain images let heroImages = []; - const imageSelectors = [ - // Primary ProPublica image selectors with improved specificity - '.lead-art img[width][height]', - 'figure.lead-art img[src]', - '.hero-image img[src]', - 'article figure img[src*="feature"][width]', - '.article-header img[src*="header"]', - '.story-header img[src*="story"]', - 'img.feature-image[src*="propublica"]', - // Secondary selectors with quality filters - 'article .article-body figure:first-of-type img[width]', - '.story-body figure:first-of-type img[width]', - // Fallback selectors with constraints - '.lead-art img[src*="propublica"]', - '.article-header img[src*="propublica"]' - ]; - - console.log('\nSearching for article images...'); + console.log('\nSearching for ProPublica article images...'); - for (const selector of imageSelectors) { - const imgs = $(selector); - console.log(`Trying selector "${selector}": found ${imgs.length} images`); + // First, look specifically for main domain images (img.assets-d.propublica.org) + const mainDomainImages = []; + $('img').each((_, img) => { + const $img = $(img); + const src = $img.attr('src') || $img.attr('data-src'); - const selectorImages = []; - imgs.each((_, img) => { - const $img = $(img); + if (src && src.includes('img.assets-d.propublica.org')) { + const width = parseInt($img.attr('width') || '0'); + const height = parseInt($img.attr('height') || '0'); - if (!imageHandler.isArticleImage($img)) { - console.log('Skipping non-article image:', $img.attr('src')); - return; - } + // Prioritize larger images (likely the main hero image) + mainDomainImages.push({ + url: src, + width: width, + height: height, + size: width * height + }); - const srcs = imageHandler.getImageSources($img); - selectorImages.push(...srcs); - }); + console.log(`Found main domain image: ${src} (${width}x${height})`); + } + }); + + // Sort by size (largest first) and take the largest one + if (mainDomainImages.length > 0) { + mainDomainImages.sort((a, b) => b.size - a.size); + const bestImage = mainDomainImages[0]; + console.log(`Selected largest image: ${bestImage.url} (${bestImage.width}x${bestImage.height})`); - if (selectorImages.length > 0) { - console.log(`Found ${selectorImages.length} potential images with selector "${selector}"`); - const validatedImages = await imageHandler.filterAndValidateImages(selectorImages); - if (validatedImages.length > 0) { - heroImages = validatedImages; - break; + // Validate the selected image + try { + const response = await fetch(bestImage.url, { method: 'HEAD' }); + if (response.ok) { + const contentType = response.headers.get('content-type'); + if (contentType && contentType.startsWith('image/') && !contentType.includes('svg')) { + heroImages = [bestImage.url]; + console.log('✓ Main domain image validated successfully'); + } } + } catch (error) { + console.log(`Failed to validate main domain image: ${error.message}`); } } - - // Fallback to meta tags if no hero image is found + + // If no main domain image found, use fallback selectors if (heroImages.length === 0) { - console.log('No hero image found with selectors, checking meta tags...'); - const metaImage = $('meta[property="og:image"]').attr('content') || $('meta[name="twitter:image"]').attr('content'); - if (metaImage) { - console.log('Found meta image:', metaImage); - const validatedMetaImage = await imageHandler.filterAndValidateImages([metaImage]); - if (validatedMetaImage.length > 0) { - heroImages = validatedMetaImage; + console.log('No main domain image found, using fallback selectors...'); + + const imageHandler = new ImageHandler('ProPublica', 'https://www.propublica.org', $); + const imageSelectors = [ + // Look for large images in article sections + '.lead-art img[width][height]', + 'figure.lead-art img[src]', + '.hero-image img[src]', + 'article figure img[width]', + '.article-header img', + '.story-header img', + // Broader selectors as last resort + 'article img[width]' + ]; + + for (const selector of imageSelectors) { + const imgs = $(selector); + console.log(`Trying fallback selector "${selector}": found ${imgs.length} images`); + + const selectorImages = []; + imgs.each((_, img) => { + const $img = $(img); + + if (!imageHandler.isArticleImage($img)) { + console.log('Skipping non-article image:', $img.attr('src')); + return; + } + + const srcs = imageHandler.getImageSources($img); + selectorImages.push(...srcs); + }); + + if (selectorImages.length > 0) { + console.log(`Found ${selectorImages.length} potential images with selector "${selector}"`); + const validatedImages = await imageHandler.filterAndValidateImages(selectorImages); + if (validatedImages.length > 0) { + heroImages = validatedImages; + break; + } } - } else { - console.log('No meta image found.'); } - } - - // Validate images - if (heroImages.length > 0) { - console.log('\nValidating images...'); - for (let i = 0; i < heroImages.length; i++) { - const imgUrl = heroImages[i]; - try { - // Check if image is accessible - const response = await fetch(imgUrl, { method: 'HEAD' }); - if (!response.ok) { - console.log(`Image ${imgUrl} is not accessible, status: ${response.status}`); - heroImages.splice(i, 1); - i--; - continue; - } - - // Check content type - const contentType = response.headers.get('content-type'); - if (!contentType || !contentType.startsWith('image/') || contentType.includes('svg')) { - console.log(`Invalid image type for ${imgUrl}: ${contentType}`); - heroImages.splice(i, 1); - i--; - continue; + + // Final fallback to meta tags if no hero image is found + if (heroImages.length === 0) { + console.log('No hero image found with selectors, checking meta tags...'); + const metaImage = $('meta[property="og:image"]').attr('content') || $('meta[name="twitter:image"]').attr('content'); + if (metaImage) { + console.log('Found meta image:', metaImage); + try { + const response = await fetch(metaImage, { method: 'HEAD' }); + if (response.ok) { + const contentType = response.headers.get('content-type'); + if (contentType && contentType.startsWith('image/') && !contentType.includes('svg')) { + heroImages = [metaImage]; + console.log('✓ Meta image validated successfully'); + } + } + } catch (error) { + console.log(`Failed to validate meta image: ${error.message}`); } - - console.log(`✓ Validated image: ${imgUrl}`); - } catch (err) { - console.log(`Failed to validate image ${imgUrl}:`, err.message); - heroImages.splice(i, 1); - i--; + } else { + console.log('No meta image found.'); } } } diff --git a/ProPublica/main.js b/ProPublica/main.js index f52e376..66b3565 100644 --- a/ProPublica/main.js +++ b/ProPublica/main.js @@ -20,12 +20,12 @@ async function sleep(ms) { } function showFetchingAnimation() { - const frames = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏']; + const frames = ['.', '..', '...', '....']; let i = 0; const interval = setInterval(() => { - process.stdout.write(`\r${frames[i]} Fetching new ProPublica articles...`); + process.stdout.write(`\rFetching new ProPublica articles${frames[i]}`); i = (i + 1) % frames.length; - }, 80); + }, 500); return interval; } @@ -198,7 +198,7 @@ async function checkForNewArticles() { } } -async function main() { +export async function main() { console.log('Starting ProPublica monitoring...'); // Load previous URLs from file @@ -239,8 +239,14 @@ async function main() { } } -main().catch(error => { - logError(`Fatal error in ProPublica main script: ${error.message}`); - console.error('Fatal error:', error); - process.exit(1); -}); \ No newline at end of file +// Export the functions for external use +export { processLatestArticle, checkForNewArticles, loadPreviousUrls, savePreviousUrls }; + +// Only run the main function if this file is executed directly +if (import.meta.url === `file://${process.argv[1]}`) { + main().catch(error => { + logError(`Fatal error in ProPublica main script: ${error.message}`); + console.error('Fatal error:', error); + process.exit(1); + }); +} \ No newline at end of file diff --git a/README.md b/README.md index 3a9349c..b2c18aa 100644 --- a/README.md +++ b/README.md @@ -1,47 +1,188 @@ -# News Aggregation Service +# Enhanced Telegram News Aggregation Bot -This service monitors ConsortiumNews, ProPublica, and Truthout for new articles, generates AI summaries, and posts them to a Telegram channel. +This service monitors ConsortiumNews, ProPublica, and Truthout for new articles, generates AI summaries, and posts them to a Telegram channel. Now includes enhanced bot commands for monitoring and control. + +> 🚀 **New User?** Check out the [Quick Start Guide](QUICK_START.md) for a 5-minute setup! + +## 🚀 Features + +- **Automated News Monitoring**: Continuously monitors multiple news sources +- **AI-Powered Summaries**: Generates concise summaries using Google's Gemini AI +- **Enhanced Bot Commands**: Full control and monitoring via Telegram commands +- **Comprehensive Logging**: Detailed activity, error, and performance logs +- **Remote Management**: Start, stop, and restart processes remotely + +## 🤖 Bot Commands + +### Core Commands +- `/log ` - Download various log files as .txt documents +- `/status` - Show bot and process status with uptime and memory usage +- `/rerun` - Restart the entire news aggregation process +- `/stop` - Completely shut down the bot and all processes +- `/help` - Show all available commands + +### Log Types Available +- `activity` - Full activity log (debug.log) +- `errors` - Error messages (errors.log) +- `articles` - Published articles log (new_articles.log) +- `fetch` - Article fetch monitoring +- `parse` - Article parsing monitoring +- `ai` - AI processing monitoring +- `telegram` - Telegram message delivery monitoring +- `memory` - Memory usage monitoring +- `performance` - Performance metrics + +### Usage Examples +``` +/log activity # Download full activity log +/log errors # Download error log +/log articles # Download published articles log +/status # Check system status +/rerun # Restart if something is stuck +/stop # Emergency shutdown +``` ## Setup -1. Copy the environment template: +1. Configure your environment: ```bash -cp .env.template .env +# Required environment variables in .env +TELEGRAM_BOT_TOKEN=your_bot_token_from_botfather +TELEGRAM_CHAT_ID=your_chat_or_channel_id +GOOGLE_API_KEY=your_google_ai_api_key ``` -2. Edit the `.env` file and add your credentials: -- `TELEGRAM_BOT_TOKEN`: Your Telegram bot token from @BotFather -- `TELEGRAM_CHAT_ID`: The ID of your Telegram channel/group -- `GOOGLE_API_KEY`: Your Google AI API key - -3. Install dependencies: +2. Install dependencies: ```bash npm install ``` -4. Start the service: +3. Start the enhanced bot with news aggregation: +```bash +npm start # Full service with bot commands (recommended) +``` + +Or choose a specific mode: +```bash +npm run bot # Bot commands only (no automatic news checking) +npm run original # Original main.js behavior +npm run test # Test Telegram connectivity +``` + +## 🔧 Running Modes + +### Full Service Mode (Recommended) +```bash +npm start +``` +- Starts enhanced bot with all commands +- Runs news aggregation every 30 minutes +- Full monitoring and logging +- Remote control via Telegram commands + +### Bot-Only Mode +```bash +npm run bot +``` +- Only starts bot for manual control +- Use `/rerun` command to trigger news aggregation +- Perfect for testing or manual operation + +### Original Mode ```bash -node main.js +npm run original ``` +- Runs original main.js once and exits +- No bot commands or continuous monitoring + +## 📊 Monitoring & Logs + +The enhanced system provides comprehensive logging: + +### Standard Logs (`logs/` directory) +- `debug.log` - Full activity log +- `errors.log` - Error messages only +- `new_articles.log` - Published articles +- `terminal.log` - General application logs + +### Enhanced Monitoring (`logs/monitor/` directory) +- `fetch_monitor.log` - Article fetching attempts +- `parse_monitor.log` - HTML parsing results +- `ai_monitor.log` - AI processing status +- `telegram_monitor.log` - Message delivery status +- `memory_monitor.log` - Memory usage tracking +- `performance_monitor.log` - Performance metrics + +### Access Logs via Bot +Use `/log ` command to download any log file instantly to your Telegram chat. -## Security Notes +## 🔒 Security Notes - Never commit the `.env` file to version control - Keep your API keys secret and secure - Regularly rotate your API keys - Monitor your API usage to prevent unauthorized access +- Bot commands are restricted to the configured TELEGRAM_CHAT_ID only -## Development +## 🛠️ Development The project uses environment variables for all sensitive configuration. When developing: 1. Always use `config.js` to access environment variables 2. Never hardcode API keys or tokens 3. Use the `.gitignore` file to prevent committing sensitive files +4. Test with `npm run test` before deployment + +## 📁 Project Structure + +``` +├── enhanced_main.js # Enhanced main script with bot integration +├── bot_controller.js # Telegram bot command handler +├── bot_launcher.js # Standalone bot launcher +├── main.js # Original main script +├── sender.js # Telegram message sender +├── ProPublica/ # ProPublica news source +├── consortiumnews/ # ConsortiumNews source +├── utils/ # Utility modules +│ ├── config.js # Environment configuration +│ ├── logger.js # Logging utilities +│ ├── monitoring.js # Enhanced monitoring +│ └── ... +└── logs/ # Log files + └── monitor/ # Enhanced monitoring logs +``` + +## 🔄 Process Management -## Logging +The enhanced bot supports full process lifecycle management: -Logs are stored in the `logs/` directory: -- `terminal.log`: General application logs -- `errors.log`: Error messages -- `new_articles.log`: New articles found +- **Start**: Automatically starts news aggregation +- **Monitor**: Real-time status via `/status` command +- **Restart**: Use `/rerun` to restart stuck processes +- **Stop**: Graceful shutdown with `/stop` command +- **Logs**: Instant access to all logs via `/log` commands + +## 🆘 Troubleshooting + +### Bot Not Responding +1. Check bot token in `.env` file +2. Verify chat ID is correct +3. Use `/status` to check if process is running +4. Try `/rerun` to restart + +### Missing Logs +```bash +/log help # See available log types +/log activity # Download main activity log +``` + +### Process Issues +```bash +/status # Check process status +/rerun # Restart news aggregation +``` + +### Emergency Stop +```bash +/stop # Complete shutdown +``` diff --git a/bot_controller.js b/bot_controller.js new file mode 100644 index 0000000..887934b --- /dev/null +++ b/bot_controller.js @@ -0,0 +1,397 @@ +import TelegramBot from 'node-telegram-bot-api'; +import { config } from './utils/config.js'; +import { monitor } from './utils/monitoring.js'; +import fs from 'fs'; +import path from 'path'; +import { spawn } from 'child_process'; + +class BotController { + constructor() { + this.bot = new TelegramBot(config.telegram.botToken, { polling: true }); + this.mainProcess = null; + this.setupCommands(); + console.log('Telegram Bot started with enhanced commands'); + } + + setupCommands() { + // /log command - Download various log files + this.bot.onText(/\/log( (.+))?/, async (msg, match) => { + const chatId = msg.chat.id; + const logType = match[2] || 'help'; + + // Only allow admin chat + if (chatId.toString() !== config.telegram.chatId) { + await this.bot.sendMessage(chatId, 'Unauthorized access'); + return; + } + + try { + await this.handleLogCommand(chatId, logType); + } catch (error) { + console.error('Error handling log command:', error); + await this.bot.sendMessage(chatId, `Error retrieving logs: ${error.message}`); + } + }); + + // /stop command - Completely shut down the bot + this.bot.onText(/\/stop/, async (msg) => { + const chatId = msg.chat.id; + + // Only allow admin chat + if (chatId.toString() !== config.telegram.chatId) { + await this.bot.sendMessage(chatId, 'Unauthorized access'); + return; + } + + try { + await this.handleStopCommand(chatId); + } catch (error) { + console.error('Error handling stop command:', error); + } + }); + + // /rerun command - Restart the entire process + this.bot.onText(/\/rerun/, async (msg) => { + const chatId = msg.chat.id; + + // Only allow admin chat + if (chatId.toString() !== config.telegram.chatId) { + await this.bot.sendMessage(chatId, 'Unauthorized access'); + return; + } + + try { + await this.handleRerunCommand(chatId); + } catch (error) { + console.error('Error handling rerun command:', error); + await this.bot.sendMessage(chatId, `Error restarting: ${error.message}`); + } + }); + + // /status command - Show bot status + this.bot.onText(/\/status/, async (msg) => { + const chatId = msg.chat.id; + + // Only allow admin chat + if (chatId.toString() !== config.telegram.chatId) { + await this.bot.sendMessage(chatId, 'Unauthorized access'); + return; + } + + try { + await this.handleStatusCommand(chatId); + } catch (error) { + console.error('Error handling status command:', error); + await this.bot.sendMessage(chatId, `Error getting status: ${error.message}`); + } + }); + + // /help command - Show available commands + this.bot.onText(/\/help/, async (msg) => { + const chatId = msg.chat.id; + + // Only allow admin chat + if (chatId.toString() !== config.telegram.chatId) { + await this.bot.sendMessage(chatId, 'Unauthorized access'); + return; + } + + await this.handleHelpCommand(chatId); + }); + } + + async handleLogCommand(chatId, logType) { + const logTypes = { + 'activity': { + file: './logs/debug.log', + name: 'activity_log.txt', + description: 'Full activity log' + }, + 'errors': { + file: './logs/errors.log', + name: 'error_log.txt', + description: 'Error log' + }, + 'articles': { + file: './logs/new_articles.log', + name: 'published_articles.txt', + description: 'Published articles log' + }, + 'fetch': { + file: './logs/monitor/fetch_monitor.log', + name: 'fetch_monitor.txt', + description: 'Article fetch monitoring' + }, + 'parse': { + file: './logs/monitor/parse_monitor.log', + name: 'parse_monitor.txt', + description: 'Article parse monitoring' + }, + 'ai': { + file: './logs/monitor/ai_monitor.log', + name: 'ai_monitor.txt', + description: 'AI processing monitoring' + }, + 'telegram': { + file: './logs/monitor/telegram_monitor.log', + name: 'telegram_monitor.txt', + description: 'Telegram message monitoring' + }, + 'memory': { + file: './logs/monitor/memory_monitor.log', + name: 'memory_monitor.txt', + description: 'Memory usage monitoring' + }, + 'performance': { + file: './logs/monitor/performance_monitor.log', + name: 'performance_monitor.txt', + description: 'Performance monitoring' + } + }; + + if (logType === 'help' || logType === 'list') { + const helpText = `Available Log Types:\n\n` + + Object.entries(logTypes).map(([key, info]) => + `• /log ${key} - ${info.description}` + ).join('\n') + + `\n\nUsage: /log \nExample: /log activity`; + + await this.bot.sendMessage(chatId, helpText); + return; + } + + const logConfig = logTypes[logType]; + if (!logConfig) { + const availableTypes = Object.keys(logTypes).join(', '); + await this.bot.sendMessage(chatId, + `Unknown log type: ${logType}\n\n` + + `Available types: ${availableTypes}\n\n` + + `Use /log help to see descriptions.` + ); + return; + } + + const logPath = path.resolve(logConfig.file); + + if (!fs.existsSync(logPath)) { + await this.bot.sendMessage(chatId, `Log file not found: ${logConfig.description}`); + return; + } + + const stats = fs.statSync(logPath); + const fileSizeMB = (stats.size / 1024 / 1024).toFixed(2); + + // Check file size (Telegram has a 50MB limit for documents) + if (stats.size > 50 * 1024 * 1024) { + await this.bot.sendMessage(chatId, + `Log file too large (${fileSizeMB}MB). ` + + `Sending last 1000 lines instead...` + ); + + // Send last 1000 lines as text + const content = fs.readFileSync(logPath, 'utf8'); + const lines = content.split('\n').slice(-1000).join('\n'); + const truncatedContent = lines.length > 4000 ? + '...(truncated)\n' + lines.slice(-4000) : lines; + + await this.bot.sendMessage(chatId, + `${logConfig.description} (last 1000 lines)\n\n` + + `\`\`\`\n${truncatedContent}\n\`\`\``, + { parse_mode: 'Markdown' } + ); + return; + } + + // Send the log file as document + await this.bot.sendMessage(chatId, + `Sending ${logConfig.description} (${fileSizeMB}MB)...` + ); + + await this.bot.sendDocument(chatId, logPath, { + caption: `${logConfig.description}\nFile size: ${fileSizeMB}MB` + }, { + filename: logConfig.name, + contentType: 'text/plain' + }); + } + + async handleStopCommand(chatId) { + await this.bot.sendMessage(chatId, 'Shutting down bot and all processes...'); + + // Stop the main process if running + if (this.mainProcess) { + this.mainProcess.kill('SIGTERM'); + this.mainProcess = null; + } + + // Log the shutdown + monitor.logPerformance('Bot', { + action: 'shutdown', + timestamp: new Date().toISOString(), + reason: 'manual_stop_command' + }); + + // Stop bot polling + await this.bot.stopPolling(); + + console.log('Bot shutdown initiated by /stop command'); + + // Exit the process + setTimeout(() => { + process.exit(0); + }, 1000); + } + + async handleRerunCommand(chatId) { + await this.bot.sendMessage(chatId, 'Restarting news aggregation process...'); + + // Stop current main process if running + if (this.mainProcess) { + this.mainProcess.kill('SIGTERM'); + await new Promise(resolve => setTimeout(resolve, 2000)); + } + + // Log the restart + monitor.logPerformance('Bot', { + action: 'restart', + timestamp: new Date().toISOString(), + reason: 'manual_rerun_command' + }); + + try { + // Start new main process + this.mainProcess = spawn('node', ['main.js'], { + stdio: ['inherit', 'pipe', 'pipe'], + detached: false + }); + + let output = ''; + this.mainProcess.stdout.on('data', (data) => { + output += data.toString(); + console.log(`[Main Process] ${data.toString().trim()}`); + }); + + this.mainProcess.stderr.on('data', (data) => { + console.error(`[Main Process Error] ${data.toString().trim()}`); + }); + + this.mainProcess.on('close', (code) => { + console.log(`Main process exited with code ${code}`); + this.mainProcess = null; + }); + + // Wait a bit and report status + setTimeout(async () => { + if (this.mainProcess && !this.mainProcess.killed) { + await this.bot.sendMessage(chatId, 'News aggregation process restarted successfully'); + } else { + await this.bot.sendMessage(chatId, 'Failed to restart news aggregation process'); + } + }, 3000); + + } catch (error) { + await this.bot.sendMessage(chatId, `Error restarting process: ${error.message}`); + } + } + + async handleStatusCommand(chatId) { + const uptime = process.uptime(); + const uptimeHours = (uptime / 3600).toFixed(1); + const memory = process.memoryUsage(); + const memoryMB = (memory.heapUsed / 1024 / 1024).toFixed(1); + + // Check if main process is running + const mainStatus = this.mainProcess ? + (this.mainProcess.killed ? 'Stopped' : 'Running') : + 'Not started'; + + // Get recent activity from logs + let recentActivity = 'No recent activity'; + try { + const activityLog = path.resolve('./logs/debug.log'); + if (fs.existsSync(activityLog)) { + const content = fs.readFileSync(activityLog, 'utf8'); + const lines = content.split('\n').filter(line => line.trim()); + if (lines.length > 0) { + const lastLine = lines[lines.length - 1]; + recentActivity = lastLine.substring(0, 100) + '...'; + } + } + } catch (error) { + recentActivity = 'Error reading activity log'; + } + + const statusText = `Bot Status Report\n\n` + + `Uptime: ${uptimeHours} hours\n` + + `Memory: ${memoryMB} MB\n` + + `Main Process: ${mainStatus}\n\n` + + `Recent Activity:\n${recentActivity}\n\n` + + `Report Time: ${new Date().toLocaleString()}`; + + await this.bot.sendMessage(chatId, statusText); + } + + async handleHelpCommand(chatId) { + const helpText = `Telegram News Bot Commands\n\n` + + `Available Commands:\n\n` + + `• /log - Download log files\n` + + `• /log help - Show available log types\n` + + `• /status - Show bot and process status\n` + + `• /rerun - Restart news aggregation\n` + + `• /stop - Shutdown bot completely\n` + + `• /help - Show this help message\n\n` + + `Log Types:\n` + + `• activity - Full activity log\n` + + `• errors - Error messages\n` + + `• articles - Published articles\n` + + `• fetch, parse, ai, telegram - Detailed monitoring\n\n` + + `Examples:\n` + + `• /log activity - Download activity log\n` + + `• /status - Check system status\n` + + `• /rerun - Restart if stuck`; + + await this.bot.sendMessage(chatId, helpText); + } + + // Method to start main process from external call + startMainProcess() { + if (this.mainProcess) { + console.log('Main process already running'); + return; + } + + try { + this.mainProcess = spawn('node', ['main.js'], { + stdio: ['inherit', 'pipe', 'pipe'], + detached: false + }); + + this.mainProcess.stdout.on('data', (data) => { + console.log(`[Main Process] ${data.toString().trim()}`); + }); + + this.mainProcess.stderr.on('data', (data) => { + console.error(`[Main Process Error] ${data.toString().trim()}`); + }); + + this.mainProcess.on('close', (code) => { + console.log(`Main process exited with code ${code}`); + this.mainProcess = null; + }); + + console.log('Main process started'); + } catch (error) { + console.error('Error starting main process:', error); + } + } + + // Method to stop the bot gracefully + async stop() { + if (this.mainProcess) { + this.mainProcess.kill('SIGTERM'); + } + await this.bot.stopPolling(); + } +} + +export { BotController }; diff --git a/consortiumnews/main.js b/consortiumnews/main.js index d27e807..d5a6ed5 100644 --- a/consortiumnews/main.js +++ b/consortiumnews/main.js @@ -20,12 +20,12 @@ async function sleep(ms) { } function showFetchingAnimation() { - const frames = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏']; + const frames = ['.', '..', '...', '....']; let i = 0; const interval = setInterval(() => { - process.stdout.write(`\r${frames[i]} Fetching new ConsortiumNews articles...`); + process.stdout.write(`\rFetching new ConsortiumNews articles${frames[i]}`); i = (i + 1) % frames.length; - }, 80); + }, 500); return interval; } @@ -187,7 +187,7 @@ async function checkForNewArticles() { } } -async function main() { +export async function main() { console.log('Starting ConsortiumNews monitoring...'); // Load previous URLs from file @@ -227,8 +227,14 @@ async function main() { } } -main().catch(error => { - logError(`Fatal error in ConsortiumNews main script: ${error.message}`); - console.error('Fatal error:', error); - process.exit(1); -}); \ No newline at end of file +// Export the functions for external use +export { processLatestArticle, checkForNewArticles, loadPreviousUrls, savePreviousUrls }; + +// Only run the main function if this file is executed directly +if (import.meta.url === `file://${process.argv[1]}`) { + main().catch(error => { + logError(`Fatal error in ConsortiumNews main script: ${error.message}`); + console.error('Fatal error:', error); + process.exit(1); + }); +} \ No newline at end of file diff --git a/main.js b/main.js new file mode 100644 index 0000000..8029b65 --- /dev/null +++ b/main.js @@ -0,0 +1,169 @@ +import { + processLatestArticle as ppProcessLatest, + checkForNewArticles as ppCheckNew, + loadPreviousUrls as ppLoadUrls, + savePreviousUrls as ppSaveUrls +} from './ProPublica/main.js'; +import { + processLatestArticle as cnProcessLatest, + checkForNewArticles as cnCheckNew, + loadPreviousUrls as cnLoadUrls, + savePreviousUrls as cnSaveUrls +} from './consortiumnews/main.js'; +import { BotController } from './bot_controller.js'; +import { monitor } from './utils/monitoring.js'; +import { logTerminal, logError } from './utils/logger.js'; + +// Global bot controller instance +let botController = null; + +async function initializeNewsSources() { + try { + console.log('Initializing news sources...'); + + // Load previous URLs for both sources + console.log('Loading ProPublica previous URLs...'); + await ppLoadUrls(); + + console.log('Loading ConsortiumNews previous URLs...'); + await cnLoadUrls(); + + console.log('News sources initialized successfully'); + } catch (error) { + console.error('Error initializing news sources:', error); + throw error; + } +} + +async function runNewsSources() { + try { + logTerminal('Starting news source aggregation...'); + console.log('Starting news source aggregation...'); + + // Run both news sources sequentially to avoid conflicts + console.log('Checking ProPublica for new articles...'); + const ppResults = await ppCheckNew(); + + console.log('Checking ConsortiumNews for new articles...'); + const cnResults = await cnCheckNew(); + + const totalFound = (ppResults?.length || 0) + (cnResults?.length || 0); + const successMessage = `Successfully processed ${totalFound} new articles (ProPublica: ${ppResults?.length || 0}, ConsortiumNews: ${cnResults?.length || 0})`; + logTerminal(successMessage); + console.log(successMessage); + + // Log performance + monitor.logPerformance('Main', { + action: 'cycle_complete', + timestamp: new Date().toISOString(), + sources: ['ProPublica', 'ConsortiumNews'], + articlesFound: totalFound + }); + + } catch (error) { + const errorMessage = `Error running news sources: ${error.message}`; + logError(errorMessage); + console.error(errorMessage); + throw error; + } +} + +async function startBot() { + try { + console.log('Starting enhanced Telegram bot...'); + botController = new BotController(); + + // Log bot startup + monitor.logPerformance('Bot', { + action: 'startup', + timestamp: new Date().toISOString(), + features: ['log_download', 'stop_command', 'rerun_command', 'status_monitoring'] + }); + + console.log('Enhanced Telegram bot started successfully'); + return botController; + } catch (error) { + const errorMessage = `Error starting bot: ${error.message}`; + logError(errorMessage); + console.error(errorMessage); + throw error; + } +} + +async function runContinuousProcess() { + const INTERVAL_MINUTES = 30; // Run every 30 minutes + const INTERVAL_MS = INTERVAL_MINUTES * 60 * 1000; + + console.log(`Starting continuous news aggregation (every ${INTERVAL_MINUTES} minutes)`); + + // Initialize news sources first + await initializeNewsSources(); + + // Run immediately on startup + await runNewsSources(); + + // Then run on interval + setInterval(async () => { + try { + console.log(`\nRunning scheduled news check (${new Date().toLocaleString()})`); + await runNewsSources(); + } catch (error) { + console.error('Error in scheduled run:', error); + logError(`Scheduled run error: ${error.message}`); + } + }, INTERVAL_MS); +} + +async function main() { + try { + console.log('Starting Enhanced Telegram News Bot'); + console.log('====================================='); + + // Start the bot controller first + await startBot(); + + // Run continuous process + await runContinuousProcess(); + + } catch (error) { + console.error('Fatal error in main process:', error); + logError(`Fatal error in main: ${error.message}`); + process.exit(1); + } +} + +// Handle graceful shutdown +process.on('SIGTERM', async () => { + console.log('Received SIGTERM, shutting down gracefully...'); + if (botController) { + await botController.stop(); + } + process.exit(0); +}); + +process.on('SIGINT', async () => { + console.log('Received SIGINT, shutting down gracefully...'); + if (botController) { + await botController.stop(); + } + process.exit(0); +}); + +// Handle uncaught exceptions +process.on('uncaughtException', (error) => { + console.error('Uncaught Exception:', error); + logError(`Uncaught Exception: ${error.message}\nStack: ${error.stack}`); + process.exit(1); +}); + +process.on('unhandledRejection', (reason, promise) => { + console.error('Unhandled Rejection at:', promise, 'reason:', reason); + logError(`Unhandled Rejection: ${reason}`); + process.exit(1); +}); + +// Export for external use +export { runNewsSources, initializeNewsSources, startBot, botController }; + +// Run the main function +main(); \ No newline at end of file diff --git a/package.json b/package.json index ec7a2e0..cd6b53c 100644 --- a/package.json +++ b/package.json @@ -1,5 +1,9 @@ { "type": "module", + "scripts": { + "start": "node main.js", + "dev": "node main.js" + }, "dependencies": { "@google/genai": "^1.0.1", "@xenova/transformers": "^2.17.2", diff --git a/previous_urls.json b/previous_urls.json index 7f31e13..e93f478 100644 --- a/previous_urls.json +++ b/previous_urls.json @@ -21,5 +21,7 @@ "https://consortiumnews.com/2025/06/03/russia-at-a-crossroads/", "https://consortiumnews.com/2025/06/03/russia-at-a-crossroads/", "https://www.propublica.org/article/investigation-ramp-gsa-smartpay-trump-peter-thiel-gerald-connolly", - "https://www.propublica.org/article/tribal-colleges-universities-trump-cuts-funding" + "https://www.propublica.org/article/tribal-colleges-universities-trump-cuts-funding", + "https://www.propublica.org/article/richard-l-bean-tennessee-detention-center-board", + "https://www.propublica.org/article/richard-l-bean-tennessee-detention-center-board" ] \ No newline at end of file diff --git a/sender.js b/sender.js index 9dded13..867bc52 100644 --- a/sender.js +++ b/sender.js @@ -39,13 +39,13 @@ async function sendWithRetry(sendFunction, maxRetries = 3) { // Function to send message with a single image while preserving Telegram's native discussion feature export async function sendNewsMessage(text, imageUrl = null, url = null) { try { - console.log('🔄 Preparing to send message to Telegram...'); + console.log('Preparing to send message to Telegram...'); console.log('Message text:', text?.substring(0, 100) + '...'); console.log('Image URL:', imageUrl || 'None'); console.log('Article URL:', url || 'None'); // Add the URL to the text if provided - const messageText = url ? `${text}\n\n🌐 View Full Article` : text; + const messageText = url ? `${text}\n\n 🌐View Full Article` : text; let success = false; if (imageUrl) { @@ -70,9 +70,9 @@ export async function sendNewsMessage(text, imageUrl = null, url = null) { throw new Error(`Invalid content type: ${contentType}`); } - console.log('✅ Image validation passed'); + console.log('Image validation passed'); } catch (error) { - console.error('❌ Image validation failed:', error.message); + console.error('Image validation failed:', error.message); // Fall back to sending text-only message imageUrl = null; } @@ -82,7 +82,7 @@ export async function sendNewsMessage(text, imageUrl = null, url = null) { // Send a single photo with caption success = await sendWithRetry(async () => { try { - console.log('📤 Preparing to send photo message to Telegram...'); + console.log('Preparing to send photo message to Telegram...'); console.log('Chat ID:', config.telegram.chatId); console.log('Image URL:', imageUrl); @@ -94,7 +94,7 @@ export async function sendNewsMessage(text, imageUrl = null, url = null) { parse_mode: 'HTML', disable_web_page_preview: true }); - console.log('✅ Photo message sent successfully via URL'); + console.log(' Photo message sent successfully via URL'); return; } catch (urlError) { console.log('Direct URL send failed, falling back to buffer method:', urlError.message); @@ -117,12 +117,12 @@ export async function sendNewsMessage(text, imageUrl = null, url = null) { parse_mode: 'HTML', disable_web_page_preview: true }); - console.log('✅ Photo message sent successfully via buffer'); + console.log('Photo message sent successfully via buffer'); } catch (bufferError) { - console.error('❌ Error sending photo via buffer:', bufferError.message); + console.error(' Error sending photo via buffer:', bufferError.message); console.error('Buffer Error details:', bufferError.response?.body || bufferError.stack || 'No additional details'); // Fall back to text-only message with image link - console.log('↪️ Falling back to text-only message'); + console.log('Falling back to text-only message'); await bot.sendMessage(config.telegram.chatId, `${messageText}\n\n[Image available at: ${imageUrl}]`, { parse_mode: 'HTML', disable_web_page_preview: false // Allow preview for image URL @@ -130,11 +130,11 @@ export async function sendNewsMessage(text, imageUrl = null, url = null) { } } catch (error) { - console.error('❌ Error sending photo:', error); + console.error(' Error sending photo:', error); console.error('Error details:', error.response?.body || 'No additional details'); // Fall back to text-only message with image link - console.log('↪️ Falling back to text-only message'); + console.log(' Falling back to text-only message'); await bot.sendMessage(config.telegram.chatId, `${messageText}\n\n[Image available at: ${imageUrl}]`, { parse_mode: 'HTML', disable_web_page_preview: false // Allow preview for image URL diff --git a/test_article_images.js b/test_article_images.js deleted file mode 100644 index d605d78..0000000 --- a/test_article_images.js +++ /dev/null @@ -1,213 +0,0 @@ -// ES Module imports -import { config } from 'dotenv'; -// Configure environment variables -config(); - -import { parseArticle as parseProPublica } from './ProPublica/GetArticleinfo.js'; -import { parseArticle as parseTruthout } from './truthout/GetArticleinfo.js'; -import { parseArticle as parseConsortiumNews } from './consortiumnews/GetArticleinfo.js'; -import { url_filtering as filterTruthout } from './truthout/url_filtering.js'; -import { url_filtering as filterProPublica } from './ProPublica/url_filtering.js'; -import { url_filtering as filterConsortiumNews } from './consortiumnews/url_filtering.js'; -import fetch from 'node-fetch'; - -async function getLatestArticles(filterFn, count = 2) { - try { - const urls = await filterFn(true); // Pass true for startup phase - if (urls && urls.length > 0) { - return urls.slice(0, count); // Get the first 'count' articles - } - } catch (err) { - console.error('Error fetching articles:', err); - } - return []; -} - -async function validateImage(imgUrl) { - try { - const response = await fetch(imgUrl, { method: 'HEAD' }); - if (!response.ok) { - console.error(`❌ Image not accessible: ${imgUrl} (${response.status})`); - return false; - } - const contentType = response.headers.get('content-type'); - if (!contentType?.startsWith('image/')) { - console.error(`❌ Invalid content type: ${imgUrl} (${contentType})`); - return false; - } - console.log(`✅ Image OK: ${imgUrl} (${contentType})`); - return true; - } catch (err) { - console.error(`❌ Failed to validate image ${imgUrl}:`, err.message); - return false; - } -} - -async function testPreviewImages(filterFn, sourceName) { - console.log(`\n=== Testing ${sourceName} Preview Images ===`); - try { - const articles = await filterFn(true); - if (!articles?.length) { - console.log(`No ${sourceName} articles found`); - return; - } - - console.log(`Found ${articles.length} articles`); - for (const article of articles.slice(0, 2)) { - console.log(`\nArticle: ${article.title}`); - console.log(`URL: ${article.url}`); - - if (article.previewImage) { - console.log('Preview Image:', article.previewImage); - await validateImage(article.previewImage); - } else { - console.log('❌ No preview image found'); - } - } - } catch (err) { - console.error(`Error testing ${sourceName}:`, err); - } -} - -async function testArticleContent(parser, article, sourceName) { - try { - console.log('\nFetching article content...'); - const content = await parser(article.url); - if (!content) { - console.log('❌ Failed to parse article content'); - return; - } - - console.log('✅ Article content retrieved'); - console.log('Title:', content.title); - console.log('Content length:', content.body.length, 'paragraphs'); - - // Compare preview image with article images - if (article.previewImage) { - console.log('\nComparing preview image with article images:'); - console.log('Preview:', article.previewImage); - console.log('Article images:', content.heroImages); - } - } catch (err) { - console.error(`Error testing ${sourceName} article content:`, err); - } -} - -async function testImageHandling() { - console.log('🔍 Starting image handling test...\n'); - - // Test each news source - const sources = [ - { name: 'ProPublica', filter: filterProPublica, parser: parseProPublica }, - { name: 'Truthout', filter: filterTruthout, parser: parseTruthout }, - { name: 'ConsortiumNews', filter: filterConsortiumNews, parser: parseConsortiumNews } - ]; - - for (const source of sources) { - // Test preview images from listings - await testPreviewImages(source.filter, source.name); - - // Get latest articles for content testing - const articles = await source.filter(true); - if (articles?.length) { - // Test full article content for the first article - await testArticleContent(source.parser, articles[0], source.name); - } - } - - console.log('\n✨ Image handling test complete!'); - - // Test Truthout - console.log('\n=== Truthout ==='); - if (testUrls.truthout.length === 0) { - console.log('No Truthout articles found to test'); - } - for (const url of testUrls.truthout) { - try { - console.log('\nTesting URL:', url); - const article = await parseTruthout(url); - if (article) { - console.log('Title:', article.title); - console.log('Found images:', article.heroImages.length); - if (article.heroImages.length > 0) { - console.log('Image URLs:'); - article.heroImages.forEach((img, i) => console.log(`${i + 1}. ${img}`)); - - // Validate images - console.log('\nValidating images...'); - for (const imgUrl of article.heroImages) { - try { - const response = await fetch(imgUrl, { method: 'HEAD' }); - if (!response.ok) { - console.error(`❌ Image not accessible: ${imgUrl} (${response.status})`); - } else { - const contentType = response.headers.get('content-type'); - console.log(`✅ Image OK: ${imgUrl} (${contentType})`); - } - } catch (err) { - console.error(`❌ Failed to validate image ${imgUrl}:`, err.message); - } - } - } - } - } catch (err) { - console.error('Error processing Truthout article:', err.message); - } - } - - // Test ConsortiumNews - console.log('\n=== ConsortiumNews ==='); - if (testUrls.consortiumnews.length === 0) { - console.log('No ConsortiumNews articles found to test'); - } - for (const url of testUrls.consortiumnews) { - try { - console.log('\nTesting URL:', url); - const article = await parseConsortiumNews(url); - if (article) { - console.log('Title:', article.title); - console.log('Found images:', article.heroImages.length); - if (article.heroImages.length > 0) { - console.log('Image URLs:'); - article.heroImages.forEach((img, i) => console.log(`${i + 1}. ${img}`)); - - // Validate images - console.log('\nValidating images...'); - for (const imgUrl of article.heroImages) { - try { - const response = await fetch(imgUrl, { method: 'HEAD' }); - if (!response.ok) { - console.error(`❌ Image not accessible: ${imgUrl} (${response.status})`); - } else { - const contentType = response.headers.get('content-type'); - console.log(`✅ Image OK: ${imgUrl} (${contentType})`); - } - } catch (err) { - console.error(`❌ Failed to validate image ${imgUrl}:`, err.message); - } - } - } - } - } catch (err) { - console.error('Error processing ConsortiumNews article:', err.message); - } - } - - console.log('\n✨ Image handling test complete!'); -} - -// Set up error handlers -process.on('uncaughtException', (err) => { - console.error('Uncaught Exception:', err); -}); - -process.on('unhandledRejection', (reason, promise) => { - console.error('Unhandled Rejection at:', promise, 'reason:', reason); -}); - -// Run the test -console.log('Starting test...'); -testImageHandling().catch(err => { - console.error('Test failed:', err); - process.exit(1); -}); diff --git a/test_image_send.js b/test_image_send.js deleted file mode 100644 index 2625126..0000000 --- a/test_image_send.js +++ /dev/null @@ -1,175 +0,0 @@ -import { sendNewsMessage } from './sender.js'; -import { config } from './utils/config.js'; -import { monitor } from './utils/monitoring.js'; -import { url_filtering as proPublicaUrlFiltering } from './ProPublica/url_filtering.js'; -import { url_filtering as consortiumUrlFiltering } from './consortiumnews/url_filtering.js'; -import { GetLatestArticle } from './consortiumnews/GetArticleinfo.js'; -import { AI_message_Gen } from './consortiumnews/AI-messageGen.js'; -import { GoogleGenAI } from "@google/genai"; -import fetch from "node-fetch"; -import cheerio from "cheerio"; - -const ai = new GoogleGenAI({ apiKey: config.google.apiKey }); - -/** - * Fetches a ProPublica article URL, extracts its body paragraphs, and uses Gemini to generate a short summary. - * Returns the trimmed summary text or null if something fails. - */ -async function summarizeProPublicaArticle(url) { - let html; - try { - html = await fetch(url).then(res => res.text()); - } catch (err) { - console.error('❌ Error fetching ProPublica article HTML:', err); - return null; - } - - const $ = cheerio.load(html); - let paragraphs = []; - if ($("article .article-body p").length) { - paragraphs = $("article .article-body p").toArray().map(el => $(el).text().trim()); - } else if ($(".body-content p").length) { - paragraphs = $(".body-content p").toArray().map(el => $(el).text().trim()); - } else if ($(".story-body p").length) { - paragraphs = $(".story-body p").toArray().map(el => $(el).text().trim()); - } - - if (!paragraphs.length) { - console.warn('⚠️ No body paragraphs found to summarize for ProPublica'); - return null; - } - - const basePrompt = ` -You are a Telegram news channel editor bot. Below is the article body as an array of paragraphs. Produce a concise 50–70 word summary (3–4 sentences max) highlighting who, what, where, and why. Omit commentary. End with a "read more" call-to-action. - -Here is the body: -`; - const prompt = basePrompt + paragraphs.join("\n\n"); - - try { - const response = await ai.models.generateContent({ - model: "gemini-2.0-flash", - contents: prompt - }); - return response.text.trim(); - } catch (err) { - console.error('❌ Error generating ProPublica summary:', err); - return null; - } -} - -async function testTelegram() { - console.log('🔄 Testing Telegram configuration and full pipeline…'); - - // 1) Check environment variables - console.log('🔍 Verifying environment variables...'); - if (!process.env.TELEGRAM_BOT_TOKEN || !process.env.TELEGRAM_CHAT_ID) { - throw new Error('Missing required Telegram environment variables'); - } - console.log('✅ Environment variables present'); - - // - // ProPublica: fetch latest, summarize, then send via AI - // - console.log('\n📥 Fetching latest ProPublica article…'); - let propubArticles; - try { - propubArticles = await proPublicaUrlFiltering(false); - } catch (err) { - console.error('❌ Error in ProPublica url_filtering:', err); - propubArticles = []; - } - - if (!propubArticles.length) { - console.log('⚠️ No recent ProPublica articles found'); - } else { - const latestPropub = propubArticles[0]; - console.log('✅ Latest ProPublica URL:', latestPropub.url); - - console.log('🔄 Summarizing ProPublica article via AI…'); - const propubSummary = await summarizeProPublicaArticle(latestPropub.url); - if (!propubSummary) { - console.log('⚠️ Skipping ProPublica send; summarization failed'); - } else { - const propubText = `📰 *ProPublica*\n${propubSummary}\n\n[Read more ›](${latestPropub.url})`; - const propubImage = latestPropub.previewImage || null; - - console.log('\n📤 Sending ProPublica AI-generated message...'); - try { - await sendNewsMessage(propubText, propubImage, null); - console.log('✅ ProPublica message sent successfully'); - } catch (error) { - console.error('❌ Error sending ProPublica message:', error); - } - } - } - - // - // ConsortiumNews: fetch latest, AI summary, then send - // - console.log('\n📥 Fetching latest ConsortiumNews article…'); - let consArticles; - try { - consArticles = await consortiumUrlFiltering(false); - } catch (err) { - console.error('❌ Error in ConsortiumNews url_filtering:', err); - consArticles = []; - } - - if (!consArticles.length) { - console.log('⚠️ No recent ConsortiumNews articles found'); - return; - } - - console.log(`✅ Found ${consArticles.length} ConsortiumNews articles, picking latest…`); - let consArticle; - try { - consArticle = await GetLatestArticle(consArticles); - } catch (err) { - console.error('❌ Error in GetLatestArticle:', err); - consArticle = null; - } - - if (!consArticle) { - console.log('⚠️ Could not retrieve detailed ConsortiumNews article'); - return; - } - - console.log('✅ Latest ConsortiumNews URL:', consArticle.url); - console.log('🔄 Generating AI summary for ConsortiumNews article…'); - - let aiResult; - try { - aiResult = await AI_message_Gen(false); - } catch (err) { - console.error('❌ Error generating AI summary for ConsortiumNews:', err); - aiResult = null; - } - - if (!aiResult) { - console.log('⚠️ AI summary for ConsortiumNews failed or returned null'); - return; - } - - const consText = `🗞️ *ConsortiumNews*\n${aiResult.summary}\n\n[Read more ›](${consArticle.url})`; - const consImage = aiResult.heroImage || null; - - console.log('\n📤 Sending ConsortiumNews AI-generated message...'); - try { - await sendNewsMessage(consText, consImage, null); - console.log('✅ ConsortiumNews message sent successfully'); - } catch (error) { - console.error('❌ Error sending ConsortiumNews message:', error); - } -} - -// Run the Telegram test sequence -console.log('🎬 Starting Telegram test sequence...'); -testTelegram() - .then(() => { - console.log('\n✨ All Telegram tests completed!'); - }) - .catch(err => { - console.error('Fatal error in Telegram tests:', err); - process.exit(1); - }); diff --git a/test_telegram.js b/test_telegram.js deleted file mode 100644 index e17e63b..0000000 --- a/test_telegram.js +++ /dev/null @@ -1,115 +0,0 @@ -import { sendNewsMessage } from './sender.js'; -import { config } from './utils/config.js'; -import { monitor } from './utils/monitoring.js'; -import { url_filtering as proPublicaUrlFiltering } from './ProPublica/url_filtering.js'; -import { url_filtering as consortiumUrlFiltering } from './consortiumnews/url_filtering.js'; -import { GetLatestArticle } from './consortiumnews/GetArticleinfo.js'; -import { AI_message_Gen } from './consortiumnews/AI-messageGen.js'; - -async function testTelegram() { - console.log('🔄 Testing Telegram configuration and full pipeline…'); - - // Verify environment variables - console.log('🔍 Checking environment variables...'); - if (!process.env.TELEGRAM_BOT_TOKEN || !process.env.TELEGRAM_CHAT_ID) { - throw new Error('Missing required Telegram environment variables'); - } - console.log('✅ Environment variables present'); - - // 1) Fetch latest ProPublica article - console.log('\n📥 Fetching latest ProPublica article…'); - let propubArticles; - try { - propubArticles = await proPublicaUrlFiltering(false); - } catch (err) { - console.error('❌ Error in ProPublica url_filtering:', err); - propubArticles = []; - } - - if (propubArticles.length === 0) { - console.log('⚠️ No recent ProPublica articles found'); - } else { - const latestPropub = propubArticles[0]; - console.log('✅ Latest ProPublica article URL:', latestPropub.url); - - // Send ProPublica message: include title, URL, and preview image if available - const propubText = `📰 *ProPublica*\n[${latestPropub.title}](${latestPropub.url})`; - const propubImage = latestPropub.previewImage || null; - - console.log('\n📤 Sending ProPublica test message...'); - try { - await sendNewsMessage(propubText, propubImage, null); - console.log('✅ ProPublica test message sent successfully'); - } catch (error) { - console.error('❌ Error sending ProPublica message:', error); - } - } - - // 2) Fetch latest ConsortiumNews article and generate AI summary - console.log('\n📥 Fetching latest ConsortiumNews article…'); - let consArticles; - try { - consArticles = await consortiumUrlFiltering(false); - } catch (err) { - console.error('❌ Error in ConsortiumNews url_filtering:', err); - consArticles = []; - } - - if (consArticles.length === 0) { - console.log('⚠️ No recent ConsortiumNews articles found'); - return; - } - - console.log(`✅ Found ${consArticles.length} ConsortiumNews articles, picking latest…`); - let consArticle; - try { - consArticle = await GetLatestArticle(consArticles); - } catch (err) { - console.error('❌ Error in GetLatestArticle:', err); - consArticle = null; - } - - if (!consArticle) { - console.log('⚠️ Could not retrieve detailed ConsortiumNews article'); - return; - } - - console.log('✅ Latest ConsortiumNews URL:', consArticle.url); - console.log('🔄 Generating AI summary for ConsortiumNews article…'); - - let aiResult; - try { - aiResult = await AI_message_Gen(false); - } catch (err) { - console.error('❌ Error generating AI summary:', err); - aiResult = null; - } - - if (!aiResult) { - console.log('⚠️ AI summary failed or returned null'); - return; - } - - // Construct Telegram message: AI summary + “read more” link + hero image - const consText = `🗞️ *ConsortiumNews*\n${aiResult.summary}\n\n[Read more ›](${consArticle.url})`; - const consImage = aiResult.heroImage || null; - - console.log('\n📤 Sending ConsortiumNews test message...'); - try { - await sendNewsMessage(consText, consImage, null); - console.log('✅ ConsortiumNews test message sent successfully'); - } catch (error) { - console.error('❌ Error sending ConsortiumNews message:', error); - } -} - -// Run the Telegram test -console.log('🎬 Starting Telegram test sequence...'); -testTelegram() - .then(() => { - console.log('\n✨ All Telegram tests completed!'); - }) - .catch(err => { - console.error('Fatal error in Telegram tests:', err); - process.exit(1); - }); diff --git a/utils/image_handler.js b/utils/image_handler.js index 3e77e0a..1244c2f 100644 --- a/utils/image_handler.js +++ b/utils/image_handler.js @@ -169,7 +169,8 @@ function isValidArticleImage(imgSrc, hostname) { // Handle ProPublica images if (hostname.includes('propublica.org')) { return (url.hostname.includes('propublica.org') || - url.hostname.includes('assets-c3.propublica.org')) && + url.hostname.includes('assets-c3.propublica.org') || + url.hostname.includes('img.assets-d.propublica.org')) && !url.pathname.includes('avatar'); }