agmission/Development/shared/translation/translate.js

186 lines
5.9 KiB
JavaScript

const fs = require('fs');
const { TranslationServiceClient } = require('@google-cloud/translate');
const xml2js = require('xml2js');
const debug = require('debug')('agm:translation');
const common = require('./common');
const GOOGLE_PROJECT_ID = process.env.GOOGLE_PROJECT_ID;
const GOOGLE_LOCATION = process.env.GOOGLE_LOCATION;
const START_FILE_PATH = common.getAppPath(`${__dirname}/${common.START_FILE}`);
const CURRENT_FILE_PATH = common.getAppPath(process.env.CURRENT_FILE_PATH);
const TRANSLATED_FILE_PATH_ES = common.getAppPath(process.env.TRANSLATED_FILE_PATH_ES);
const TRANSLATED_FILE_PATH_PT = common.getAppPath(process.env.TRANSLATED_FILE_PATH_PT);
const translationClient = new TranslationServiceClient();
debug('Starting translation script');
async function main() {
try {
// Translate new texts differences between the start and current files
const { added } = await diffXlfFiles(START_FILE_PATH, CURRENT_FILE_PATH);
await processTranslation(added, TRANSLATED_FILE_PATH_ES, TRANSLATED_FILE_PATH_PT);
// Translate any left over new translations that was not picked up using diff from the translated files
const newTranslations = await extractNewTranslations(TRANSLATED_FILE_PATH_ES);
await processTranslation(newTranslations, TRANSLATED_FILE_PATH_ES, TRANSLATED_FILE_PATH_PT);
// Remove blank lines from the translated files
removeBlankLines(TRANSLATED_FILE_PATH_ES);
removeBlankLines(TRANSLATED_FILE_PATH_PT);
}
catch (err) {
debug('Error translating files:', err);
}
}
(async () => {
await main();
})();
async function processTranslation(added, translatedFilePathEs, translatedFilePathPt) {
const sourceTexts = Object.values(added);
const translations = await Promise.all([
translateTexts(sourceTexts, 'es'),
translateTexts(sourceTexts, 'pt')
]);
const [translatedTextEs, translatedTextPt] = translations;
if (translatedTextEs.length > 0) {
const addedEs = { ...added };
for (const key in addedEs) {
addedEs[key] = translatedTextEs.shift();
}
await updateTranslationUnit(translatedFilePathEs, addedEs);
}
if (translatedTextPt.length > 0) {
const addedPt = { ...added };
for (const key in addedPt) {
addedPt[key] = translatedTextPt.shift();
}
await updateTranslationUnit(translatedFilePathPt, addedPt);
}
}
/**
*
* @param {*} filePath
* @param { [{id : 'translated text'}]} newTranslation
*/
async function updateTranslationUnit(filePath, newTranslation) {
const jsonObj = await parseXlf(filePath);
const transUnits = jsonObj.xliff.file[0].body[0]['trans-unit'];
for (const key in newTranslation) {
const transUnit = transUnits.find(unit => unit.$.id === key);
if (transUnit?.target[0].$.state === 'new') {
transUnit.target[0]._ = newTranslation[key];
transUnit.target[0].$.state = 'needs-l10n';
}
}
const builder = new xml2js.Builder();
const updatedXml = builder.buildObject(jsonObj);
fs.writeFileSync(filePath, updatedXml, 'utf-8');
debug(`File ${filePath} updated successfully.`);
removeBlankLines(filePath);
}
async function parseXlf(filePath) {
const fileContent = fs.readFileSync(filePath, 'utf-8');
const parser = new xml2js.Parser();
const result = await parser.parseStringPromise(fileContent);
return result;
}
async function diffXlfFiles(file1Path, file2Path) {
const file1 = await parseXlf(file1Path);
const file2 = await parseXlf(file2Path);
const file1TransUnits = file1.xliff.file[0].body[0]['trans-unit'];
const file2TransUnits = file2.xliff.file[0].body[0]['trans-unit'];
const file1Ids = new Set(file1TransUnits.map(unit => unit.$.id));
const file2Ids = new Set(file2TransUnits.map(unit => unit.$.id));
const added = {};
file2TransUnits.forEach(unit => {
if (!file1Ids.has(unit.$.id)) {
added[unit.$.id] = unit.source[0];
}
});
const removedIds = [...file1Ids].filter(id => !file2Ids.has(id));
return { added, removedIds };
}
async function translateTexts(sourceTexts, targetLanguageCode) {
const placeholders = {};
const placeholderPrefix = '__PLACEHOLDER__';
let placeholderIndex = 0;
// Replace delimited terms with placeholders
const textsToTranslate = sourceTexts.map(text => {
if (typeof text.replace === 'function') {
return text.replace(/#(.*?)#/g, (match, p1) => {
const placeholder = `${placeholderPrefix}${placeholderIndex++}`;
placeholders[placeholder] = match;
return placeholder;
});
}
return text;
});
if (textsToTranslate.length === 0) {
return [];
}
const request = {
parent: `projects/${GOOGLE_PROJECT_ID}/locations/${GOOGLE_LOCATION}`,
contents: textsToTranslate,
mimeType: 'text/plain',
sourceLanguageCode: 'en',
targetLanguageCode
};
const [response] = await translationClient.translateText(request);
const translatedTexts = response.translations.map(translation => translation.translatedText);
// Replace placeholders back with original terms
return translatedTexts.map(text => {
if (typeof text.replace === 'function') {
return text.replace(new RegExp(`${placeholderPrefix}\\d+`, 'g'), match => placeholders[match]);
}
return text;
});
};
async function extractNewTranslations(filePath) {
const jsonObj = await parseXlf(filePath);
const transUnits = jsonObj.xliff.file[0].body[0]['trans-unit'];
const newTranslations = {};
transUnits.forEach(unit => {
if (unit.target[0].$.state === 'new') {
newTranslations[unit.$.id] = unit.source[0];
}
});
return newTranslations;
}
function removeBlankLines(filePath) {
const fileContent = fs.readFileSync(filePath, 'utf-8');
const lines = fileContent.split('\n');
const nonBlankLines = lines.filter(line => line.trim() !== '');
const updatedContent = nonBlankLines.join('\n');
fs.writeFileSync(filePath, updatedContent, 'utf-8');
debug(`Blank lines removed from ${filePath} successfully.`);
}