1098 lines
44 KiB
JavaScript
1098 lines
44 KiB
JavaScript
'use strict';
|
|
|
|
/**
|
|
* Orphaned Application Details Cleanup Worker
|
|
*
|
|
* This script identifies and removes application detail records where the referenced fileId
|
|
* no longer exists in the AppFile collection. These orphaned records can accumulate over time
|
|
* when application files are deleted but their corresponding detail records remain.
|
|
*
|
|
* Key Features:
|
|
* - Identifies orphaned application details by checking fileId references
|
|
* - Uses in-memory caching of AppFile IDs for fast lookup performance
|
|
* - Time-based processing (yearly/monthly periods) for handling billion+ documents
|
|
* - Efficient ObjectId timestamp filtering for date ranges
|
|
* - OPTIMIZED: Skips expensive countDocuments() calls that scan billions of records
|
|
* - Progressive counting and early termination for empty periods
|
|
* - Batch processing with configurable batch sizes for large datasets
|
|
* - Bulk delete operations for efficient cleanup
|
|
* - Comprehensive progress tracking per time period and overall
|
|
* - Supports dry-run mode for safe testing
|
|
* - Implements robust error handling with retry logic
|
|
* - Follows the same database connection pattern as other worker scripts
|
|
*
|
|
* Performance Optimizations (Nov 2024):
|
|
* - Eliminated countDocuments() calls that were scanning 1+ billion records per time period
|
|
* - Added quick existence checks to skip empty periods
|
|
* - Progressive counting shows processing rate instead of percentage
|
|
* - Configurable counting strategies: skip (default), estimate, or full
|
|
*
|
|
* Usage:
|
|
* # Check and remove orphaned application details for all years (2020-2025)
|
|
* DEBUG=agm:clean-orphaned-details node server/workers/cleanOrphanedAppDetails.js
|
|
*
|
|
* # Run with specific environment file (loads all variables from the file)
|
|
* set -a && source environment.env && set +a && DEBUG=agm:clean-orphaned-details node server/workers/cleanOrphanedAppDetails.js
|
|
* set -a && source environment_prod.env && set +a && DEBUG=agm:clean-orphaned-details node server/workers/cleanOrphanedAppDetails.js
|
|
*
|
|
* # Run with dotenv (if your project uses it)
|
|
* DOTENV_CONFIG_PATH=environment.env DEBUG=agm:clean-orphaned-details node -r dotenv/config server/workers/cleanOrphanedAppDetails.js
|
|
*
|
|
* # Process only a specific year using command line argument
|
|
* DEBUG=agm:clean-orphaned-details node server/workers/cleanOrphanedAppDetails.js --specific-year=2024
|
|
*
|
|
* # Process a range of years using command line arguments
|
|
* DEBUG=agm:clean-orphaned-details node server/workers/cleanOrphanedAppDetails.js --start-year=2022 --end-year=2024
|
|
*
|
|
* # Process a specific date range using command line arguments
|
|
* DEBUG=agm:clean-orphaned-details node server/workers/cleanOrphanedAppDetails.js --start-date=2024-06-01 --end-date=2024-06-30
|
|
*
|
|
* # Process from a specific date to now
|
|
* DEBUG=agm:clean-orphaned-details node server/workers/cleanOrphanedAppDetails.js --start-date=2024-01-15
|
|
*
|
|
* # Process with ISO datetime format
|
|
* DEBUG=agm:clean-orphaned-details node server/workers/cleanOrphanedAppDetails.js --start-date=2024-06-01T10:30:00Z --end-date=2024-06-15T15:45:00Z
|
|
*
|
|
* # Fast execution (default - skips expensive counting)
|
|
* DEBUG=agm:clean-orphaned-details COUNTING_STRATEGY=skip node server/workers/cleanOrphanedAppDetails.js
|
|
*
|
|
* # With estimation for progress tracking
|
|
* DEBUG=agm:clean-orphaned-details COUNTING_STRATEGY=estimate node server/workers/cleanOrphanedAppDetails.js
|
|
*
|
|
* # Dry run mode with command line arguments
|
|
* DEBUG=agm:clean-orphaned-details node server/workers/cleanOrphanedAppDetails.js --dry-run --start-year=2025
|
|
*
|
|
* # Check only mode with command line arguments
|
|
* DEBUG=agm:clean-orphaned-details node server/workers/cleanOrphanedAppDetails.js --check-only --specific-year=2024
|
|
*
|
|
* # Silent mode - only show output when orphans are found
|
|
* DEBUG=agm:clean-orphaned-details AGM_SILENT=true node server/workers/cleanOrphanedAppDetails.js
|
|
*
|
|
* # Write statistics to custom file
|
|
* DEBUG=agm:clean-orphaned-details AGM_STATS_FILE=./results/cleanup-2024.json node server/workers/cleanOrphanedAppDetails.js
|
|
*
|
|
* # For large datasets (billion+ records), use memory management flags
|
|
* DEBUG=agm:clean-orphaned-details node --expose-gc --max-old-space-size=8192 scripts/cleanOrphanedAppDetails.js
|
|
*
|
|
* # Using environment variables (legacy method)
|
|
* DEBUG=agm:clean-orphaned-details SPECIFIC_YEAR=2024 node server/workers/cleanOrphanedAppDetails.js
|
|
* DEBUG=agm:clean-orphaned-details START_YEAR=2022 END_YEAR=2024 node server/workers/cleanOrphanedAppDetails.js
|
|
* DEBUG=agm:clean-orphaned-details START_DATE=2024-06-01 END_DATE=2024-06-30 node server/workers/cleanOrphanedAppDetails.js
|
|
* DEBUG=agm:clean-orphaned-details DRY_RUN=true START_DATE=2024-01-01 node server/workers/cleanOrphanedAppDetails.js
|
|
*
|
|
* Command Line Arguments:
|
|
* --dry-run # Only reports what would be deleted without making changes
|
|
* --check-only # Only check for orphaned records without deleting
|
|
* --start-year=YYYY # Starting year for processing (default: 2020)
|
|
* --end-year=YYYY # Ending year for processing (default: current year)
|
|
* --specific-year=YYYY # Process only a specific year (overrides start/end year)
|
|
* --start-date=YYYY-MM-DD # Starting date for processing (YYYY-MM-DD or ISO format)
|
|
* --end-date=YYYY-MM-DD # Ending date for processing (YYYY-MM-DD or ISO format)
|
|
* --batch-size=N # Number of documents per batch (default: 1000)
|
|
* --counting-strategy=STRATEGY # skip, estimate, or full (default: skip)
|
|
*
|
|
* Environment Variables:
|
|
* - AGM_DRY_RUN=true # Only reports what would be deleted without making changes
|
|
* - AGM_BATCH_SIZE=1000 # Number of documents per batch (default: 1000)
|
|
* - AGM_MAX_RETRIES=3 # Maximum number of retries for errors (default: 3)
|
|
* - AGM_RETRY_DELAY=1000 # Base delay in ms between retries (default: 1000)
|
|
* - AGM_SHOW_PROGRESS=true # Whether to show progress indicator (default: true)
|
|
* - AGM_SILENT=true # Suppress progress output unless orphans are found (default: false)
|
|
* - AGM_CHECK_ONLY=false # Only check for orphaned records without deleting (default: false)
|
|
* - AGM_TIME_PERIOD=yearly # Time period for batching: yearly, monthly, or custom (default: yearly)
|
|
* - AGM_COUNTING_STRATEGY=skip # How to handle document counting: skip, estimate, or full (default: skip)
|
|
* - AGM_START_YEAR=2020 # Starting year for processing (default: 2020)
|
|
* - AGM_END_YEAR=2025 # Ending year for processing (default: current year)
|
|
* - AGM_SPECIFIC_YEAR=2024 # Process only a specific year (overrides START_YEAR/END_YEAR)
|
|
* - AGM_START_DATE=2024-01-01 # Starting date for processing (YYYY-MM-DD or ISO format)
|
|
* - AGM_END_DATE=2024-12-31 # Ending date for processing (YYYY-MM-DD or ISO format)
|
|
* - AGM_STATS_FILE=./cleanup-stats.json # File to write statistics results (default: ./cleanup-stats.json)
|
|
*/
|
|
|
|
const debug = require('debug')('agm:clean-orphaned-details');
|
|
const { DBConnection } = require('../helpers/db/connect.js');
|
|
const mongoose = require('mongoose');
|
|
const utils = require('../helpers/utils.js');
|
|
const AppDetail = require('../model/application_detail.js');
|
|
const AppFile = require('../model/application_file.js');
|
|
const fs = require('fs').promises;
|
|
const path = require('path');
|
|
|
|
/**
|
|
* Parse command line arguments
|
|
* @returns {Object} Parsed configuration object
|
|
*/
|
|
function parseArguments() {
|
|
const args = process.argv.slice(2);
|
|
const config = {
|
|
dryRun: process.env.AGM_DRY_RUN === 'true' || process.env.DRY_RUN === 'true',
|
|
batchSize: parseInt(process.env.AGM_BATCH_SIZE || process.env.BATCH_SIZE || '1000', 10),
|
|
maxRetries: parseInt(process.env.AGM_MAX_RETRIES || process.env.MAX_RETRIES || '3', 10),
|
|
retryDelay: parseInt(process.env.AGM_RETRY_DELAY || process.env.RETRY_DELAY || '1000', 10),
|
|
showProgress: (process.env.AGM_SHOW_PROGRESS || process.env.SHOW_PROGRESS || 'true') !== 'false',
|
|
silent: process.env.AGM_SILENT === 'true' || process.env.SILENT === 'true',
|
|
checkOnly: process.env.AGM_CHECK_ONLY === 'true' || process.env.CHECK_ONLY === 'true',
|
|
timePeriod: process.env.AGM_TIME_PERIOD || process.env.TIME_PERIOD || 'yearly',
|
|
countingStrategy: process.env.AGM_COUNTING_STRATEGY || process.env.COUNTING_STRATEGY || 'skip',
|
|
startYear: parseInt(process.env.AGM_START_YEAR || process.env.START_YEAR || '2020', 10),
|
|
endYear: parseInt(process.env.AGM_END_YEAR || process.env.END_YEAR || new Date().getFullYear().toString(), 10),
|
|
specificYear: (process.env.AGM_SPECIFIC_YEAR || process.env.SPECIFIC_YEAR) ? parseInt(process.env.AGM_SPECIFIC_YEAR || process.env.SPECIFIC_YEAR, 10) : null,
|
|
startDate: process.env.AGM_START_DATE || process.env.START_DATE || null,
|
|
endDate: process.env.AGM_END_DATE || process.env.END_DATE || null,
|
|
statsFile: process.env.AGM_STATS_FILE || './cleanup-stats.json'
|
|
|
|
};
|
|
|
|
// Parse command line arguments and override environment variables
|
|
for (let i = 0; i < args.length; i++) {
|
|
const arg = args[i];
|
|
|
|
if (arg === '--dry-run') {
|
|
config.dryRun = true;
|
|
} else if (arg === '--check-only') {
|
|
config.checkOnly = true;
|
|
} else if (arg.startsWith('--start-year=')) {
|
|
config.startYear = parseInt(arg.split('=')[1], 10);
|
|
config.specificYear = null; // Clear specific year if start year is provided
|
|
} else if (arg.startsWith('--end-year=')) {
|
|
config.endYear = parseInt(arg.split('=')[1], 10);
|
|
config.specificYear = null; // Clear specific year if end year is provided
|
|
} else if (arg.startsWith('--specific-year=')) {
|
|
config.specificYear = parseInt(arg.split('=')[1], 10);
|
|
} else if (arg.startsWith('--start-date=')) {
|
|
config.startDate = arg.split('=')[1];
|
|
config.specificYear = null; // Clear specific year if start date is provided
|
|
} else if (arg.startsWith('--end-date=')) {
|
|
config.endDate = arg.split('=')[1];
|
|
config.specificYear = null; // Clear specific year if end date is provided
|
|
} else if (arg.startsWith('--batch-size=')) {
|
|
config.batchSize = parseInt(arg.split('=')[1], 10);
|
|
} else if (arg.startsWith('--counting-strategy=')) {
|
|
config.countingStrategy = arg.split('=')[1];
|
|
}
|
|
}
|
|
|
|
return config;
|
|
}
|
|
|
|
// Parse configuration from both environment variables and command line arguments
|
|
const CONFIG = parseArguments();
|
|
|
|
// Configuration constants for backward compatibility
|
|
const DRY_RUN = CONFIG.dryRun;
|
|
const BATCH_SIZE = CONFIG.batchSize;
|
|
const MAX_RETRIES = CONFIG.maxRetries;
|
|
const RETRY_DELAY_MS = CONFIG.retryDelay;
|
|
const SHOW_PROGRESS = CONFIG.showProgress;
|
|
const SILENT = CONFIG.silent;
|
|
const CHECK_ONLY = CONFIG.checkOnly;
|
|
const TIME_PERIOD = CONFIG.timePeriod;
|
|
const COUNTING_STRATEGY = CONFIG.countingStrategy;
|
|
const START_YEAR = CONFIG.startYear;
|
|
const END_YEAR = CONFIG.endYear;
|
|
const SPECIFIC_YEAR = CONFIG.specificYear;
|
|
const START_DATE = CONFIG.startDate;
|
|
const END_DATE = CONFIG.endDate;
|
|
const STATS_FILE = CONFIG.statsFile;
|
|
|
|
/**
|
|
* Create ObjectId from date for filtering
|
|
* @param {Date|string} date - Date to convert to ObjectId
|
|
* @returns {mongoose.Types.ObjectId} ObjectId with timestamp
|
|
*/
|
|
function createObjectIdFromDate(date) {
|
|
const dateObj = new Date(date);
|
|
const timestamp = Math.floor(dateObj.getTime() / 1000);
|
|
const objectIdHex = timestamp.toString(16) + '0000000000000000';
|
|
return new mongoose.Types.ObjectId(objectIdHex);
|
|
}
|
|
|
|
/**
|
|
* Quick estimation of document count using sampling (alternative to countDocuments)
|
|
* This provides a rough estimate without scanning billions of records
|
|
* @param {Object} periodFilter - MongoDB filter for the time period
|
|
* @param {string} timePeriodName - Name of the time period for logging
|
|
* @returns {Promise<number>} Estimated document count
|
|
*/
|
|
async function estimateDocumentCount(periodFilter, timePeriodName) {
|
|
try {
|
|
// Sample a small number of documents to estimate density
|
|
const sampleSize = 1000;
|
|
const sampleDocs = await AppDetail.find(periodFilter)
|
|
.select('_id')
|
|
.limit(sampleSize)
|
|
.lean();
|
|
|
|
if (sampleDocs.length === 0) return 0;
|
|
if (sampleDocs.length < sampleSize) return sampleDocs.length;
|
|
|
|
// Use collection stats for rough estimation
|
|
const collStats = await mongoose.connection.db.collection('application_details').stats();
|
|
const totalDocs = collStats.count || collStats.size || 0;
|
|
|
|
// Estimate based on ObjectId time range
|
|
const startId = sampleDocs[0]._id;
|
|
const endId = sampleDocs[sampleDocs.length - 1]._id;
|
|
const timeRangeMs = endId.getTimestamp().getTime() - startId.getTimestamp().getTime();
|
|
const totalTimeSpanMs = Date.now() - new Date('2020-01-01').getTime(); // Rough total span
|
|
|
|
const estimatedCount = Math.floor((totalDocs * timeRangeMs) / totalTimeSpanMs);
|
|
debug(`${timePeriodName} estimated count: ~${estimatedCount.toLocaleString()} (sample-based)`);
|
|
|
|
return estimatedCount;
|
|
} catch (error) {
|
|
debug(`Estimation failed for ${timePeriodName}: ${error.message}, falling back to progressive counting`);
|
|
return -1; // Indicates estimation failed
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Parse date string into Date object with validation
|
|
* @param {string} dateString - Date string in YYYY-MM-DD or ISO format
|
|
* @param {string} paramName - Parameter name for error messages
|
|
* @returns {Date} Parsed date object
|
|
*/
|
|
function parseDate(dateString, paramName) {
|
|
if (!dateString) return null;
|
|
|
|
let date;
|
|
|
|
// Try parsing as ISO string first, then as YYYY-MM-DD
|
|
if (dateString.includes('T') || dateString.includes('Z')) {
|
|
date = new Date(dateString);
|
|
} else {
|
|
// Assume YYYY-MM-DD format and create at start of day UTC
|
|
date = new Date(`${dateString}T00:00:00.000Z`);
|
|
}
|
|
|
|
if (isNaN(date.getTime())) {
|
|
throw new Error(`Invalid date format for ${paramName}: ${dateString}. Use YYYY-MM-DD or ISO format.`);
|
|
}
|
|
|
|
return date;
|
|
}
|
|
|
|
/**
|
|
* Generate time periods to process based on configuration
|
|
* @returns {Array} Array of time period objects with start and end dates
|
|
*/
|
|
function generateTimePeriods() {
|
|
const periods = [];
|
|
|
|
// If specific dates are provided, use them (highest priority)
|
|
if (START_DATE || END_DATE) {
|
|
const startDate = START_DATE ? parseDate(START_DATE, 'START_DATE') : new Date('2020-01-01T00:00:00.000Z');
|
|
const endDate = END_DATE ? parseDate(END_DATE, 'END_DATE') : new Date(); // Current date if not specified
|
|
|
|
// Ensure end date is after start date
|
|
if (endDate <= startDate) {
|
|
throw new Error(`End date (${endDate.toISOString()}) must be after start date (${startDate.toISOString()})`);
|
|
}
|
|
|
|
// For date ranges, create periods based on the time span
|
|
const daysDiff = Math.ceil((endDate - startDate) / (1000 * 60 * 60 * 24));
|
|
|
|
if (daysDiff <= 31) {
|
|
// Single period for ranges up to 1 month
|
|
periods.push({
|
|
name: `Custom Range: ${startDate.toISOString().split('T')[0]} to ${endDate.toISOString().split('T')[0]}`,
|
|
startDate: startDate,
|
|
endDate: endDate
|
|
});
|
|
} else if (daysDiff <= 365) {
|
|
// Monthly periods for ranges up to 1 year
|
|
let currentDate = new Date(startDate);
|
|
let periodCount = 1;
|
|
|
|
while (currentDate < endDate) {
|
|
const periodEnd = new Date(Math.min(
|
|
new Date(currentDate.getFullYear(), currentDate.getMonth() + 1, 1).getTime(),
|
|
endDate.getTime()
|
|
));
|
|
|
|
periods.push({
|
|
name: `Period ${periodCount}: ${currentDate.toISOString().split('T')[0]} to ${periodEnd.toISOString().split('T')[0]}`,
|
|
startDate: new Date(currentDate),
|
|
endDate: periodEnd
|
|
});
|
|
|
|
currentDate = new Date(currentDate.getFullYear(), currentDate.getMonth() + 1, 1);
|
|
periodCount++;
|
|
}
|
|
} else {
|
|
// Yearly periods for ranges longer than 1 year
|
|
let currentDate = new Date(startDate);
|
|
let periodCount = 1;
|
|
|
|
while (currentDate < endDate) {
|
|
const periodEnd = new Date(Math.min(
|
|
new Date(currentDate.getFullYear() + 1, 0, 1).getTime(),
|
|
endDate.getTime()
|
|
));
|
|
|
|
periods.push({
|
|
name: `Period ${periodCount}: ${currentDate.toISOString().split('T')[0]} to ${periodEnd.toISOString().split('T')[0]}`,
|
|
startDate: new Date(currentDate),
|
|
endDate: periodEnd
|
|
});
|
|
|
|
currentDate = new Date(currentDate.getFullYear() + 1, 0, 1);
|
|
periodCount++;
|
|
}
|
|
}
|
|
} else if (SPECIFIC_YEAR) {
|
|
// Process the specific year in monthly periods for better memory management and progress tracking
|
|
debug(`Processing specific year ${SPECIFIC_YEAR} in monthly periods`);
|
|
|
|
for (let month = 0; month < 12; month++) {
|
|
const startDate = new Date(`${SPECIFIC_YEAR}-${String(month + 1).padStart(2, '0')}-01T00:00:00.000Z`);
|
|
const endDate = new Date(SPECIFIC_YEAR, month + 1, 1); // First day of next month
|
|
|
|
periods.push({
|
|
name: `${SPECIFIC_YEAR}-${String(month + 1).padStart(2, '0')} (${startDate.toLocaleDateString('en-US', { month: 'long', year: 'numeric' })})`,
|
|
startDate: startDate,
|
|
endDate: endDate
|
|
});
|
|
}
|
|
} else {
|
|
// Process from START_YEAR to END_YEAR
|
|
// For recent years (which likely have more data), split into monthly periods
|
|
for (let year = START_YEAR; year <= END_YEAR; year++) {
|
|
// Split years 2020 and later into monthly periods for better memory management
|
|
if (year >= 2020) {
|
|
debug(`Processing year ${year} in monthly periods (recent year with potentially large dataset)`);
|
|
|
|
for (let month = 0; month < 12; month++) {
|
|
const startDate = new Date(`${year}-${String(month + 1).padStart(2, '0')}-01T00:00:00.000Z`);
|
|
const endDate = new Date(year, month + 1, 1); // First day of next month
|
|
|
|
periods.push({
|
|
name: `${year}-${String(month + 1).padStart(2, '0')} (${startDate.toLocaleDateString('en-US', { month: 'long', year: 'numeric' })})`,
|
|
startDate: startDate,
|
|
endDate: endDate
|
|
});
|
|
}
|
|
} else {
|
|
// For older years (pre-2020), process as full years since they likely have less data
|
|
periods.push({
|
|
name: `Year ${year}`,
|
|
startDate: new Date(`${year}-01-01T00:00:00.000Z`),
|
|
endDate: new Date(`${year + 1}-01-01T00:00:00.000Z`)
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
return periods;
|
|
}
|
|
|
|
/**
|
|
* Write statistics to JSON file (append mode to preserve history)
|
|
* @param {Object} stats - Statistics object to write
|
|
* @param {string} phase - Current phase (e.g., 'period', 'final')
|
|
* @param {string} periodName - Name of current period (optional)
|
|
* @param {boolean} force - Force write even if not enough time has passed
|
|
*/
|
|
async function writeStatsToFile(stats, phase = 'update', periodName = null, force = false) {
|
|
try {
|
|
// Rate limit statistics writing - only write every 30 seconds unless forced
|
|
const now = Date.now();
|
|
if (!force && writeStatsToFile.lastWrite && (now - writeStatsToFile.lastWrite) < 30000) {
|
|
return;
|
|
}
|
|
writeStatsToFile.lastWrite = now;
|
|
|
|
// Read existing statistics file to preserve history
|
|
let existingData = { sessions: [] };
|
|
try {
|
|
if (await fs.access(STATS_FILE).then(() => true).catch(() => false)) {
|
|
const existingContent = await fs.readFile(STATS_FILE, 'utf8');
|
|
if (existingContent.trim()) {
|
|
existingData = JSON.parse(existingContent);
|
|
// Ensure sessions array exists
|
|
if (!existingData.sessions) {
|
|
existingData.sessions = [];
|
|
}
|
|
}
|
|
}
|
|
} catch (parseError) {
|
|
debug(`Warning: Could not parse existing stats file, starting fresh: ${parseError.message}`);
|
|
existingData = { sessions: [] };
|
|
}
|
|
|
|
// Create new session entry
|
|
const sessionEntry = {
|
|
sessionId: stats.sessionId || `session_${Date.now()}`,
|
|
timestamp: new Date().toISOString(),
|
|
phase: phase,
|
|
currentPeriod: periodName,
|
|
sessionSummary: {
|
|
totalDeleted: stats.deleted,
|
|
totalOrphaned: stats.totalOrphaned,
|
|
periodsProcessed: stats.periodsProcessed,
|
|
periodsTotal: stats.periodsTotal,
|
|
errors: stats.errors,
|
|
sessionStartTime: stats.startTime || new Date().toISOString()
|
|
},
|
|
allPeriods: stats.periodResults,
|
|
configuration: {
|
|
dryRun: DRY_RUN,
|
|
checkOnly: CHECK_ONLY,
|
|
batchSize: BATCH_SIZE,
|
|
countingStrategy: COUNTING_STRATEGY,
|
|
specificYear: SPECIFIC_YEAR,
|
|
startYear: START_YEAR,
|
|
endYear: END_YEAR,
|
|
startDate: START_DATE,
|
|
endDate: END_DATE
|
|
}
|
|
};
|
|
|
|
// For 'started' phase, create a new session
|
|
if (phase === 'started') {
|
|
existingData.sessions.push(sessionEntry);
|
|
} else {
|
|
// For other phases, update the current session (last entry)
|
|
if (existingData.sessions.length > 0) {
|
|
const currentSession = existingData.sessions[existingData.sessions.length - 1];
|
|
// Update the existing session with new data
|
|
Object.assign(currentSession, sessionEntry);
|
|
} else {
|
|
// No existing session, create new one
|
|
existingData.sessions.push(sessionEntry);
|
|
}
|
|
}
|
|
|
|
// Keep only the last 50 sessions to prevent file from growing too large
|
|
if (existingData.sessions.length > 50) {
|
|
existingData.sessions = existingData.sessions.slice(-50);
|
|
}
|
|
|
|
// Add metadata
|
|
existingData.lastUpdated = new Date().toISOString();
|
|
existingData.totalSessions = existingData.sessions.length;
|
|
|
|
await fs.writeFile(STATS_FILE, JSON.stringify(existingData, null, 2), 'utf8');
|
|
debug(`Statistics appended to ${STATS_FILE} (session ${sessionEntry.sessionId})`);
|
|
} catch (error) {
|
|
debug(`Error writing statistics to file: ${error.message}`);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Process orphaned records immediately (clean after each period)
|
|
* @param {Array} orphanedRecords - Array of orphaned records for this period
|
|
* @param {Object} stats - Statistics object to update
|
|
* @param {string} periodName - Name of the current period
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async function processOrphanedRecordsImmediately(orphanedRecords, stats, periodName) {
|
|
if (orphanedRecords.length === 0) {
|
|
debug(`No orphaned records to process for ${periodName}`);
|
|
return;
|
|
}
|
|
|
|
debug(`Processing ${orphanedRecords.length} orphaned records for ${periodName}...`);
|
|
|
|
if (CHECK_ONLY) {
|
|
debug(`CHECK_ONLY mode: Found ${orphanedRecords.length} orphaned records in ${periodName}`);
|
|
stats.processed += orphanedRecords.length;
|
|
stats.dryRunCount += orphanedRecords.length;
|
|
stats.totalOrphaned += orphanedRecords.length;
|
|
return;
|
|
}
|
|
|
|
// Get sample records for reporting
|
|
const sampleRecords = await getSampleOrphanedRecords(orphanedRecords, 3);
|
|
if (sampleRecords.length > 0) {
|
|
debug(`Sample orphaned records from ${periodName}:`);
|
|
sampleRecords.forEach((record, index) => {
|
|
const createdDate = record._id.getTimestamp().toISOString();
|
|
debug(` ${index + 1}. ID: ${record._id}, FileID: ${record.fileId}, Created: ${createdDate}`);
|
|
});
|
|
}
|
|
|
|
// Process orphaned records in batches for deletion
|
|
const batches = utils.chunkArray(orphanedRecords, BATCH_SIZE);
|
|
debug(`Processing ${batches.length} deletion batches for ${periodName}`);
|
|
|
|
for (const batch of batches) {
|
|
try {
|
|
stats.batches++;
|
|
|
|
// Delete the batch
|
|
await deleteBatch(batch, stats);
|
|
|
|
// Small delay between batches to reduce database load
|
|
if (stats.batches % 10 === 0) {
|
|
await sleep(100);
|
|
}
|
|
|
|
} catch (error) {
|
|
debug(`Error processing deletion batch ${stats.batches} for ${periodName}: ${error.message}`);
|
|
stats.errors++;
|
|
|
|
// Continue with next batch, but break if too many consecutive errors
|
|
if (stats.errors > 8) {
|
|
debug('Too many deletion errors, stopping batch processing');
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
stats.totalOrphaned += orphanedRecords.length;
|
|
debug(`Completed processing ${orphanedRecords.length} orphaned records for ${periodName}`);
|
|
}
|
|
|
|
/**
|
|
* Sleep for specified milliseconds
|
|
* @param {number} ms - Milliseconds to sleep
|
|
* @returns {Promise<void>}
|
|
*/
|
|
function sleep(ms) {
|
|
return new Promise(resolve => setTimeout(resolve, ms));
|
|
}
|
|
|
|
/**
|
|
* Retry wrapper for operations with exponential backoff
|
|
* @param {Function} operation - Operation to retry
|
|
* @param {string} operationName - Name of the operation for logging
|
|
* @param {number} maxRetries - Maximum number of retries
|
|
* @returns {Promise<any>} Result of the operation
|
|
*/
|
|
async function withRetry(operation, operationName, maxRetries = MAX_RETRIES) {
|
|
let lastError;
|
|
|
|
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
try {
|
|
return await operation();
|
|
} catch (error) {
|
|
lastError = error;
|
|
|
|
if (attempt === maxRetries) {
|
|
debug(`${operationName} failed after ${maxRetries + 1} attempts: ${error.message}`);
|
|
throw error;
|
|
}
|
|
|
|
const delay = RETRY_DELAY_MS * Math.pow(2, attempt);
|
|
debug(`${operationName} failed (attempt ${attempt + 1}/${maxRetries + 1}): ${error.message}. Retrying in ${delay}ms...`);
|
|
await sleep(delay);
|
|
}
|
|
}
|
|
|
|
throw lastError;
|
|
}
|
|
|
|
/**
|
|
* Load all existing AppFile IDs into memory for fast lookup
|
|
* @returns {Promise<Set>} Set of all existing AppFile _ids
|
|
*/
|
|
async function loadAppFileIds() {
|
|
debug('Loading all AppFile IDs into memory...');
|
|
|
|
return await withRetry(async () => {
|
|
const appFiles = await AppFile.find({ markedDelete: { $ne: true } }, { _id: 1 }).lean();
|
|
const fileIds = new Set(appFiles.map(file => file._id.toString()));
|
|
debug(`Loaded ${fileIds.size} AppFile IDs into memory cache`);
|
|
return fileIds;
|
|
}, 'Load AppFile IDs');
|
|
}
|
|
|
|
/**
|
|
* Find orphaned application details by checking against in-memory cache for a specific time period
|
|
* This approach loads all AppFile IDs into memory first, then checks each application detail within the time range
|
|
* Uses cursor-based pagination instead of skip() for better performance on large datasets
|
|
* @param {Object} timePeriod - Time period object with startDate and endDate
|
|
* @param {Set} existingFileIds - Set of existing AppFile IDs for lookup
|
|
* @returns {Promise<Array>} Array of orphaned application detail documents
|
|
*/
|
|
async function findOrphanedAppDetailsForPeriod(timePeriod, existingFileIds) {
|
|
const periodStartTime = new Date();
|
|
debug(`Finding orphaned application details for ${timePeriod.name}...`);
|
|
|
|
// Create ObjectId filters for the time period
|
|
const startObjectId = createObjectIdFromDate(timePeriod.startDate);
|
|
const endObjectId = createObjectIdFromDate(timePeriod.endDate);
|
|
|
|
debug(`Period: ${timePeriod.startDate.toISOString()} to ${timePeriod.endDate.toISOString()}`);
|
|
debug(`ObjectId range: ${startObjectId} to ${endObjectId}`);
|
|
|
|
// Get total count of application details for this period
|
|
const periodFilter = {
|
|
_id: {
|
|
$gte: startObjectId,
|
|
$lt: endObjectId
|
|
}
|
|
};
|
|
|
|
// Handle counting strategy: skip, estimate, or full
|
|
let totalAppDetails = -1; // -1 indicates progressive counting
|
|
|
|
if (COUNTING_STRATEGY === 'full') {
|
|
// Original expensive approach - scan all documents
|
|
totalAppDetails = await withRetry(async () => {
|
|
return await AppDetail.countDocuments(periodFilter);
|
|
}, `Count application details for ${timePeriod.name}`);
|
|
|
|
debug(`Checking ${totalAppDetails} application details in ${timePeriod.name} against ${existingFileIds.size} existing file IDs`);
|
|
|
|
if (totalAppDetails === 0) {
|
|
debug(`No application details found for ${timePeriod.name}`);
|
|
return { found: 0, processed: 0 };
|
|
}
|
|
} else if (COUNTING_STRATEGY === 'estimate') {
|
|
// Use estimation for approximate progress tracking
|
|
totalAppDetails = await estimateDocumentCount(periodFilter, timePeriod.name);
|
|
|
|
if (totalAppDetails === 0) {
|
|
debug(`No application details found for ${timePeriod.name} - skipping`);
|
|
return { found: 0, processed: 0 };
|
|
} else if (totalAppDetails > 0) {
|
|
debug(`Estimated ${totalAppDetails.toLocaleString()} application details in ${timePeriod.name} (checking against ${existingFileIds.size} existing file IDs)`);
|
|
} else {
|
|
// Estimation failed, fall back to progressive counting
|
|
debug(`Estimation failed for ${timePeriod.name}, using progressive counting`);
|
|
totalAppDetails = -1;
|
|
}
|
|
} else {
|
|
// Default: skip counting entirely - use progressive counting
|
|
debug(`Scanning ${timePeriod.name} for orphaned application details (progressive counting enabled)`);
|
|
debug(`Period filter: _id >= ${startObjectId} and _id < ${endObjectId}`);
|
|
debug(`Will check against ${existingFileIds.size} existing file IDs`);
|
|
|
|
// Quick check if period has any data by fetching just one document
|
|
const hasData = await withRetry(async () => {
|
|
const sample = await AppDetail.findOne(periodFilter).select('_id').lean();
|
|
return !!sample;
|
|
}, `Check if ${timePeriod.name} has data`);
|
|
|
|
if (!hasData) {
|
|
debug(`No application details found for ${timePeriod.name} - skipping`);
|
|
return { found: 0, processed: 0 };
|
|
}
|
|
}
|
|
|
|
// Use streaming approach - process in chunks without accumulating all orphaned records
|
|
let processed = 0;
|
|
let totalOrphaned = 0;
|
|
let lastId = startObjectId;
|
|
const checkBatchSize = Math.min(BATCH_SIZE, 5000); // Use smaller batches for memory checking
|
|
|
|
// Adaptive progress reporting - less frequent for large datasets
|
|
const progressInterval = existingFileIds.size > 1000000 ? checkBatchSize * 50 : checkBatchSize * 10;
|
|
|
|
while (true) {
|
|
// Use cursor-based pagination instead of skip() for better performance
|
|
const cursorFilter = {
|
|
_id: {
|
|
// Use $gt if we have processed records (lastId), otherwise start from beginning with $gte
|
|
[lastId.equals(startObjectId) ? '$gte' : '$gt']: lastId,
|
|
$lt: endObjectId
|
|
}
|
|
};
|
|
|
|
// Fetch batch of application details for this time period
|
|
const appDetailsBatch = await withRetry(async () => {
|
|
return await AppDetail.find(cursorFilter)
|
|
.select('_id fileId')
|
|
.sort({ _id: 1 })
|
|
.limit(checkBatchSize)
|
|
.lean();
|
|
}, `Fetch application details batch for ${timePeriod.name} (lastId: ${lastId})`);
|
|
|
|
if (appDetailsBatch.length === 0) {
|
|
break; // No more records
|
|
}
|
|
|
|
// Process this batch immediately - find orphaned records and process them
|
|
const orphanedBatch = [];
|
|
for (const appDetail of appDetailsBatch) {
|
|
// Skip records with missing or null fileId - these are legacy data
|
|
if (!appDetail.fileId) {
|
|
// Skip this record, probably used appId originally, don't count it as orphaned
|
|
processed++;
|
|
continue;
|
|
}
|
|
|
|
if (!existingFileIds.has(appDetail.fileId.toString())) {
|
|
orphanedBatch.push({
|
|
_id: appDetail._id,
|
|
fileId: appDetail.fileId
|
|
});
|
|
}
|
|
processed++;
|
|
}
|
|
|
|
// Process orphaned records from this batch immediately to avoid memory accumulation
|
|
if (orphanedBatch.length > 0) {
|
|
totalOrphaned += orphanedBatch.length;
|
|
|
|
// Process immediately if not in CHECK_ONLY mode
|
|
if (!CHECK_ONLY) {
|
|
await processOrphanedRecordsImmediately(orphanedBatch, {
|
|
deleted: 0,
|
|
errors: 0,
|
|
batches: 0
|
|
}, timePeriod.name);
|
|
}
|
|
|
|
debug(`Processed ${orphanedBatch.length} orphaned records from batch (Total orphaned so far: ${totalOrphaned})`);
|
|
}
|
|
|
|
// Update lastId for cursor-based pagination
|
|
if (appDetailsBatch.length > 0) {
|
|
// Set lastId to the last document's _id from this batch
|
|
const lastDoc = appDetailsBatch[appDetailsBatch.length - 1];
|
|
lastId = lastDoc._id;
|
|
|
|
// For next iteration, we'll use $gt instead of $gte to avoid duplicates
|
|
// So we need to slightly modify the cursor filter
|
|
}
|
|
|
|
// Show progress for the checking phase, unless silent mode is enabled and no orphans found
|
|
// Less frequent progress reporting for large datasets
|
|
if (!SILENT && SHOW_PROGRESS && processed % progressInterval === 0) {
|
|
const elapsedSeconds = (Date.now() - periodStartTime.getTime()) / 1000;
|
|
const rate = processed / (elapsedSeconds || 1);
|
|
|
|
if (totalAppDetails > 0) {
|
|
// Show percentage progress when we have total count (estimate or full)
|
|
const percentage = ((processed / totalAppDetails) * 100).toFixed(1);
|
|
debug(`${timePeriod.name} progress: ${processed}/${totalAppDetails} (${percentage}%) ${totalAppDetails > 1000000 ? '[estimated]' : ''} - Found ${totalOrphaned} orphaned so far`);
|
|
} else {
|
|
// Progressive counting mode - show rate only
|
|
debug(`${timePeriod.name} progress: ${processed} processed (${rate.toFixed(1)} records/sec) - Found ${totalOrphaned} orphaned so far`);
|
|
}
|
|
}
|
|
|
|
// Break if we've processed fewer records than requested (end of collection)
|
|
if (appDetailsBatch.length < checkBatchSize) {
|
|
break;
|
|
}
|
|
|
|
// Small delay to prevent overwhelming the database - longer for large datasets
|
|
const delayMs = existingFileIds.size > 1000000 ? 50 : 10;
|
|
await sleep(delayMs);
|
|
|
|
// Force garbage collection every 100 batches to prevent memory buildup
|
|
if (processed % (checkBatchSize * 100) === 0) {
|
|
if (global.gc) {
|
|
global.gc();
|
|
debug(`Forced garbage collection at ${processed} records processed`);
|
|
}
|
|
}
|
|
}
|
|
|
|
debug(`Completed checking ${processed} application details for ${timePeriod.name} - Found ${totalOrphaned} orphaned records`);
|
|
return { found: totalOrphaned, processed: totalOrphaned };
|
|
}
|
|
|
|
/**
|
|
* Get sample of orphaned records for reporting
|
|
* @param {Array} orphanedIds - Array of orphaned document _ids
|
|
* @param {number} sampleSize - Number of sample records to retrieve
|
|
* @returns {Promise<Array>} Sample of orphaned records
|
|
*/
|
|
async function getSampleOrphanedRecords(orphanedIds, sampleSize = 5) {
|
|
if (orphanedIds.length === 0) return [];
|
|
|
|
const sampleIds = orphanedIds.slice(0, sampleSize).map(doc => doc._id);
|
|
|
|
return await withRetry(async () => {
|
|
return await AppDetail.find({
|
|
_id: { $in: sampleIds }
|
|
}).select('_id fileId lat lon').lean();
|
|
}, 'Get sample orphaned records');
|
|
}
|
|
|
|
/**
|
|
* Delete a batch of orphaned application details
|
|
* @param {Array} batch - Batch of document _ids to delete
|
|
* @param {Object} stats - Statistics object to update
|
|
* @returns {Promise<void>}
|
|
*/
|
|
async function deleteBatch(batch, stats) {
|
|
if (!batch || batch.length === 0) {
|
|
return;
|
|
}
|
|
|
|
const batchIds = batch.map(doc => doc._id);
|
|
debug(`Processing batch of ${batchIds.length} orphaned records...`);
|
|
|
|
if (DRY_RUN || CHECK_ONLY) {
|
|
debug(`${DRY_RUN ? 'DRY RUN' : 'CHECK ONLY'}: Would delete ${batchIds.length} orphaned application details`);
|
|
stats.processed += batchIds.length;
|
|
stats.dryRunCount += batchIds.length;
|
|
return;
|
|
}
|
|
|
|
// Execute bulk delete operation with retry
|
|
const result = await withRetry(async () => {
|
|
return await AppDetail.deleteMany({
|
|
_id: { $in: batchIds }
|
|
});
|
|
}, `Delete batch of ${batchIds.length} orphaned records`);
|
|
|
|
// Update statistics
|
|
stats.processed += batchIds.length;
|
|
stats.deleted += result.deletedCount || 0;
|
|
|
|
debug(`Batch completed: ${result.deletedCount} records deleted`);
|
|
}
|
|
|
|
/**
|
|
* Display progress information
|
|
* @param {number} processed - Number of documents processed
|
|
* @param {number} total - Total number of documents
|
|
* @param {Date} startTime - Start time of the operation
|
|
*/
|
|
function showProgress(processed, total, startTime) {
|
|
if (!SHOW_PROGRESS || total === 0) return;
|
|
|
|
const elapsed = Date.now() - startTime.getTime();
|
|
const rate = processed / (elapsed / 1000);
|
|
const remaining = total - processed;
|
|
const eta = remaining > 0 ? remaining / rate : 0;
|
|
const percentage = ((processed / total) * 100).toFixed(1);
|
|
|
|
const formatTime = (seconds) => {
|
|
const hours = Math.floor(seconds / 3600);
|
|
const minutes = Math.floor((seconds % 3600) / 60);
|
|
const secs = Math.floor(seconds % 60);
|
|
|
|
if (hours > 0) {
|
|
return `${hours}h ${minutes}m ${secs}s`;
|
|
} else if (minutes > 0) {
|
|
return `${minutes}m ${secs}s`;
|
|
} else {
|
|
return `${secs}s`;
|
|
}
|
|
};
|
|
|
|
debug(`Progress: ${processed}/${total} (${percentage}%) | Rate: ${rate.toFixed(1)} records/sec | ETA: ${formatTime(eta)}`);
|
|
}
|
|
|
|
/**
|
|
* Main function to clean orphaned application details
|
|
* @returns {Promise<Object>} Statistics about the cleanup operation
|
|
*/
|
|
async function cleanOrphanedAppDetails() {
|
|
const startTime = new Date();
|
|
debug(`Starting orphaned application details cleanup...`);
|
|
debug(`Configuration:`);
|
|
debug(` - DRY_RUN: ${DRY_RUN}`);
|
|
debug(` - CHECK_ONLY: ${CHECK_ONLY}`);
|
|
debug(` - BATCH_SIZE: ${BATCH_SIZE}`);
|
|
debug(` - TIME_PERIOD: ${TIME_PERIOD}`);
|
|
debug(` - COUNTING_STRATEGY: ${COUNTING_STRATEGY} (skip=fastest, estimate=approximate, full=slow)`);
|
|
debug(` - SPECIFIC_YEAR: ${SPECIFIC_YEAR || 'none'}`);
|
|
debug(` - START_YEAR: ${START_YEAR}`);
|
|
debug(` - END_YEAR: ${END_YEAR}`);
|
|
debug(` - START_DATE: ${START_DATE || 'none'}`);
|
|
debug(` - END_DATE: ${END_DATE || 'none'}`);
|
|
debug(` - Date range mode: ${START_DATE || END_DATE ? 'ENABLED' : 'DISABLED'}`);
|
|
debug(` - Years to process: ${SPECIFIC_YEAR || (START_DATE || END_DATE ? 'custom date range' : `${START_YEAR}-${END_YEAR}`)}`);
|
|
debug(` - Command line args: ${process.argv.slice(2).join(' ') || 'none'}`);
|
|
|
|
// Debug environment variable sources
|
|
debug('Environment Variable Sources:');
|
|
debug(` - process.env.AGM_START_YEAR: "${process.env.AGM_START_YEAR || 'undefined'}"`);
|
|
debug(` - process.env.START_YEAR: "${process.env.START_YEAR || 'undefined'}"`);
|
|
debug(` - process.env.AGM_END_YEAR: "${process.env.AGM_END_YEAR || 'undefined'}"`);
|
|
debug(` - process.env.END_YEAR: "${process.env.END_YEAR || 'undefined'}"`);
|
|
debug(` - process.env.AGM_START_DATE: "${process.env.AGM_START_DATE || 'undefined'}"`);
|
|
debug(` - process.env.START_DATE: "${process.env.START_DATE || 'undefined'}"`);
|
|
debug(` - process.env.AGM_END_DATE: "${process.env.AGM_END_DATE || 'undefined'}"`);
|
|
debug(` - process.env.END_DATE: "${process.env.END_DATE || 'undefined'}"`);
|
|
debug(` - process.env.AGM_SPECIFIC_YEAR: "${process.env.AGM_SPECIFIC_YEAR || 'undefined'}"`);
|
|
debug(` - process.env.SPECIFIC_YEAR: "${process.env.SPECIFIC_YEAR || 'undefined'}"`);
|
|
|
|
// Initialize statistics
|
|
const stats = {
|
|
sessionId: `session_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`,
|
|
processed: 0,
|
|
deleted: 0,
|
|
errors: 0,
|
|
dryRunCount: 0,
|
|
totalOrphaned: 0,
|
|
batches: 0,
|
|
periodsProcessed: 0,
|
|
periodsTotal: 0,
|
|
periodResults: [], // Track results per period
|
|
startTime: new Date().toISOString() // Track session start time
|
|
};
|
|
|
|
try {
|
|
// Load all AppFile IDs into memory once at the beginning
|
|
debug('Loading AppFile IDs into memory...');
|
|
const allFileIds = await loadAppFileIds();
|
|
|
|
// Generate time periods to process
|
|
const timePeriods = generateTimePeriods();
|
|
stats.periodsTotal = timePeriods.length;
|
|
|
|
debug(`Processing ${timePeriods.length} time periods:`);
|
|
timePeriods.forEach(period => {
|
|
debug(` - ${period.name}: ${period.startDate.toISOString().split('T')[0]} to ${period.endDate.toISOString().split('T')[0]}`);
|
|
});
|
|
|
|
// Write initial statistics
|
|
await writeStatsToFile(stats, 'started', null, true);
|
|
|
|
for (const timePeriod of timePeriods) {
|
|
const periodStartTime = new Date();
|
|
|
|
try {
|
|
debug(`\n${'='.repeat(60)}`);
|
|
debug(`Processing ${timePeriod.name}...`);
|
|
debug(`${'='.repeat(60)}`);
|
|
|
|
// Find orphaned records for this time period (streaming approach)
|
|
const periodResult = await findOrphanedAppDetailsForPeriod(timePeriod, allFileIds);
|
|
|
|
// Track period results
|
|
const periodStats = {
|
|
name: timePeriod.name,
|
|
startDate: timePeriod.startDate.toISOString(),
|
|
endDate: timePeriod.endDate.toISOString(),
|
|
orphanedFound: periodResult.found,
|
|
processed: periodResult.processed,
|
|
deleted: periodResult.processed, // In streaming mode, found = processed = deleted (unless CHECK_ONLY)
|
|
errors: 0,
|
|
duration: (Date.now() - periodStartTime.getTime()) / 1000
|
|
};
|
|
|
|
if (periodResult.found > 0) {
|
|
debug(`Found and processed ${periodResult.found} orphaned records in ${timePeriod.name}`);
|
|
|
|
// Update global stats
|
|
stats.totalOrphaned += periodResult.found;
|
|
stats.processed += periodResult.processed;
|
|
if (!CHECK_ONLY) {
|
|
stats.deleted += periodResult.processed;
|
|
}
|
|
} else {
|
|
debug(`No orphaned records found in ${timePeriod.name}`);
|
|
}
|
|
|
|
stats.periodResults.push(periodStats);
|
|
stats.periodsProcessed++;
|
|
|
|
// Write updated statistics after each period
|
|
await writeStatsToFile(stats, 'period-completed', timePeriod.name, true);
|
|
|
|
debug(`Period ${timePeriod.name} completed in ${periodStats.duration.toFixed(2)}s`);
|
|
|
|
} catch (error) {
|
|
debug(`Error processing ${timePeriod.name}: ${error.message}`);
|
|
stats.errors++;
|
|
|
|
// Track failed period
|
|
const periodResult = {
|
|
name: timePeriod.name,
|
|
startDate: timePeriod.startDate.toISOString(),
|
|
endDate: timePeriod.endDate.toISOString(),
|
|
orphanedFound: 0,
|
|
processed: 0,
|
|
deleted: 0,
|
|
errors: 1,
|
|
duration: (Date.now() - periodStartTime.getTime()) / 1000,
|
|
error: error.message
|
|
};
|
|
stats.periodResults.push(periodResult);
|
|
|
|
// Continue with next period unless too many errors
|
|
if (stats.errors > 3) {
|
|
debug('Too many period errors, stopping operation');
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (stats.totalOrphaned === 0) {
|
|
debug('\nNo orphaned application details found across all time periods. Database is clean!');
|
|
await writeStatsToFile(stats, 'completed-clean', null, true);
|
|
return stats;
|
|
}
|
|
|
|
const endTime = new Date();
|
|
const duration = (endTime.getTime() - startTime.getTime()) / 1000;
|
|
|
|
debug('\nCleanup operation completed!');
|
|
debug('='.repeat(50));
|
|
debug(`Time periods processed: ${stats.periodsProcessed}/${stats.periodsTotal}`);
|
|
debug(`Total orphaned records found: ${stats.totalOrphaned}`);
|
|
debug(`Total deletion batches processed: ${stats.batches}`);
|
|
debug(`Records processed: ${stats.processed}`);
|
|
|
|
if (DRY_RUN) {
|
|
debug(`Dry run count: ${stats.dryRunCount}`);
|
|
} else if (CHECK_ONLY) {
|
|
debug(`Check only count: ${stats.dryRunCount}`);
|
|
} else {
|
|
debug(`Records deleted: ${stats.deleted}`);
|
|
}
|
|
|
|
debug(`Errors encountered: ${stats.errors}`);
|
|
debug(`Duration: ${duration.toFixed(2)} seconds`);
|
|
debug(`Average rate: ${stats.processed > 0 ? (stats.processed / duration).toFixed(1) : 0} records/second`);
|
|
debug(`Statistics written to: ${STATS_FILE}`);
|
|
debug('='.repeat(50));
|
|
|
|
// Write final statistics
|
|
stats.totalDuration = duration;
|
|
stats.completedAt = endTime.toISOString();
|
|
await writeStatsToFile(stats, 'completed', null, true);
|
|
|
|
return stats;
|
|
|
|
} catch (error) {
|
|
debug(`Fatal error during cleanup operation: ${error.message}`);
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Set up global process error handling
|
|
*/
|
|
process
|
|
.on('uncaughtException', function (err) {
|
|
debug('Uncaught Exception:', err);
|
|
process.exit(1);
|
|
})
|
|
.on('unhandledRejection', (reason, p) => {
|
|
debug('Unhandled Rejection at Promise:', p, 'reason:', reason);
|
|
process.exit(1);
|
|
});
|
|
|
|
/**
|
|
* Main execution - follows the same pattern as other worker scripts
|
|
*/
|
|
async function main() {
|
|
const dbConn = new DBConnection('Clean Orphaned App Details Script');
|
|
|
|
try {
|
|
await dbConn.initialize({ setupExitHandlers: false });
|
|
debug('Database connected');
|
|
|
|
// Run the cleanup operation
|
|
const result = await cleanOrphanedAppDetails();
|
|
|
|
// Log final result
|
|
if (result.errors > 0) {
|
|
debug(`Operation completed with ${result.errors} errors`);
|
|
} else {
|
|
debug('Operation completed successfully');
|
|
}
|
|
|
|
} catch (error) {
|
|
debug('Operation failed:', error);
|
|
} finally {
|
|
await dbConn.close();
|
|
process.exit();
|
|
}
|
|
}
|
|
|
|
// Execute main function
|
|
main();
|