const EventEmitter = require('events');
var uuid = require('uuid'); // required for generating timebased guids
var fs = require('fs'); // required for filesystem functionality
const Path = require('path');
const chokidar = require('chokidar');
const Parser = require(__dirname + '/parser.util.js');
const gmObj = require('gm'); // Image Magic for Merging Fax pages
class IlsFaxParser extends EventEmitter {
constructor() {
super();
var watcher = null;
this.gs = require('ghostscript4js');
this.gm = gmObj.subClass({imageMagick: true});
this.config = require(__dirname + '/../../../config/config.js'); // Read global config
// Initialize variables
var faxIncomingPath = '';
var faxArchivePath = '';
const svcName = 'ILSFAX-INBOUND' // Set Service Name for Logging purposes
this.faxIncomingPath = 'C:\\Temp\\fax\\incoming';
this.faxArchivePath = 'C:\\Temp\\fax\\archive';
this.fileName = '';
this.analysisPathPng = '';
this.analysisPath = '';
this.parser = new Parser();
this.on('new-fax-pdf', (path) => {
this.convertPDF(path);
});
this.on('new-fax-png', (path) => {
this.startOCR(path);
});
this.on('new-converted-fax-pngs', (analysisPathPng) => {
this.LogAtMain('Counting PNG files after PDF conversion...');
this.getNumberOfPagesInFolder(analysisPathPng, (pagesCounter) => {
if (pagesCounter > 1) {
this.LogAtMain('more than one page --> merging pages needed before OCR');
this.mergePages(analysisPathPng, (mergeSuccess) => {
// After Merging pages --> Start OCR
if (mergeSuccess === true) {
this.LogAtMain('Starting OCR ...');
this.parser.ocr(analysisPathPng + '/ocr.png');
}
})
} else {
this.LogAtMain('only one page, no merging needed');
this.LogAtMain('Starting OCR ...');
this.parser.ocr(analysisPathPng + this.fileName + '001.png');
}
});
})
}
LogAtMain(msg) {
process.send('{"logmessage" : "' + msg + '"}');
}
/**
* checks the configured working directories for existence.
* If they not exist, try to create them
* @param {function} callback
*/
checkWorkingDirectories(callback) {
var result = true;
if ((this.faxIncomingPath.length > 0) && (this.faxArchivePath.length > 0)) {
// there seems to be a path defined for incoming path
// check if path exists, if not create it
if (!fs.existsSync(this.faxIncomingPath)) {
this.LogAtMain(' | configured Incoming Fax Directory ' + this.faxIncomingPath + ' not existing! Try creating directory');
try {
fs.mkdirSync(this.faxIncomingPath);
} catch(err) {
if (err) {
this.LogAtMain('configured Incoming Fax Directory ' + this.faxIncomingPath + ' not existing! Try creating directory');
result = false;
} else {
this.LogAtMain('configured Incoming Fax Directory ' + this.faxIncomingPath + ' has successfully been created');
}
}
}
if (!fs.existsSync(this.faxArchivePath)) {
this.LogAtMain('configured Fax Archive Directory ' + this.faxArchivePath + ' not existing! Try creating directory');
try {
fs.mkdirSync(this.faxArchivePath);
} catch(err) {
if (err) {
this.LogAtMain('configured Fax Archive Directory ' + this.faxArchivePath + ' not existing! Try creating directory');
result = false;
} else {
this.LogAtMain('configured Fax Archive Directory ' + this.faxIncomingPath + ' has successfully been created');
}
}
}
} else {
result = false;
}
callback(result);
}
/**
* Creates subfolders for analysis files in archive folder
* @param {string} folderGuuid
* @param {function} callback
*/
createAnylysisFolder(folderGuuid, callback) {
var dir = this.faxArchivePath + '/' + folderGuuid;
if (!fs.existsSync(dir)){
fs.mkdirSync(dir);
fs.mkdirSync(dir + '/png');
fs.mkdirSync(dir + '/txt');
this.LogAtMain('creating Analysis Folder ' + dir);
}
callback(dir);
}
initializeWatcher() {
this.watcher = chokidar.watch(this.faxIncomingPath, {
ignored: /(^|[\/\\])\../, // ignore dotfiles
persistent: true,
ignoreInitial: true, // Make sure only new files after service start are proccessed
awaitWriteFinish: true, // makes sure, file is completely written to disk before service accesses it
usePolling: true // needed especially for network shares
});
this.watcher
.on('add', path => {
this.processNewFax(path);
})
.on('change', path => {
this.LogAtMain(`File ${path} has been changed`);
})
.on('unlink', path => {
this.LogAtMain(`File ${path} has been removed`);
})
.on('error', error => {
this.LogAtMain(`Watcher error: ${error}`);
})
.on('ready', () => {
this.LogAtMain('Initial scan of incoming Fax Path complete. Ready for incoming faxes');
});
this.emit('watcher-initialized');
}
processNewFax(path) {
// Start processing new fax
this.LogAtMain('New Faximile file found in incoming folder: ' + encodeURIComponent(path));
this.eventguuid = uuid.v1();
var extension = Path.extname(path);
this.fileName = Path.basename(path,extension) + this.eventguuid;
this.LogAtMain('the file ' + encodeURIComponent(path) + ' was created in incoming folder');
switch (Path.extname(path)){
case '.jpg':
// JPEG File identified
this.emit('new-fax-jpeg', path);
break;
case '.png':
// png file identified
this.emit('new-fax-png', path);
this.LogAtMain('PNG File found | Starting OCR ...');
break;
case '.pdf':
// pdf file identified
this.emit('new-fax-pdf', path);
this.LogAtMain('PDF File found | Starting conversion ...');
break;
default:
this.LogAtMain('Unknown File Extension. Can not handle files with extension ' + Path.extname(path));
break;
}
}
/**
* Converts PDF file to PNG file(s)
* @param {string} path
*/
convertPDF(path) {
this.createAnylysisFolder(this.eventguuid, (analysisPath) => {
var analysisPathPng = analysisPath + '/png/';
var param_ghostscript = '-dSAFER -dQUIET -q -SDEVICE=png16m -dINTERPOLATE -dNumRenderingThreads=8 -dFirstPage=1 -dLastPage=10 -r300 -o ' + analysisPathPng + this.fileName + '%03d.png -c 3000000 setvmthreshold -f '+ path
this.LogAtMain('Starting Ghostscript: ' + param_ghostscript);
try {
this.gs.execute(param_ghostscript)
.then(() => {
// Ghostscript returned successfully
this.LogAtMain('Successfully converted PDF to PNG');
this.emit('new-converted-fax-pngs', analysisPathPng);
})
} catch (err) {
// Error in ghostscript execution
this.LogAtMain('Error converting PDF document to OCR readbable format: ' + err);
};
});
}
/**
* returns number of files in folder
* @param {string} folder
* @param {function} callback
*/
getNumberOfPagesInFolder(folder, callback) {
fs.readdir(folder, (err, files) => {
if (err) {
this.LogAtMain('error reading number of fax pages in folder ' + folder);
callback(0);
} else {
this.LogAtMain('Fax has ' + files.length + ' pages');
callback(files.length);
}
});
}
/**
* Merges all png files in folder into one png file
* @param {string} folder
* @param {function} callback
*/
mergePages(folder, callback) {
fs.readdir(folder, (err, files) => {
if (files.length > 1) {
if (files.length === 2) {
this.gm(folder + '/' + files[1])
.montage(folder + '/' + files[0])
.geometry('+0%+100%')
.tile('1x')
.out("-define")
.out("png:color-type=2")
.write(folder + '/ocr.png', function(err) {
if(!err) {
this.LogAtMain('Written merged fax png');
callback(true);
} else {
this.LogAtMain('Error merging pdf pages to single page: ' + err);
callback(false);
}
})
}
if (files.length === 3) {
this.gm(folder + '/' + files[3])
.montage(folder + '/' + files[2])
.geometry('+0%+100%')
.tile('1x')
.montage(folder + '/' + files[1])
.geometry('+0%+200%')
.tile('1x')
.out("-define")
.out("png:color-type=2")
.write(folder + '/ocr.png', function(err) {
if(!err) {
this.LogAtMain('Written merged fax png');
callback(true);
} else {
this.LogAtMain('Error merging pdf pages to single page: ' + err);
callback(false);
}
})
}
}
})
}
/**
* Runs Tesseract and creates text representation of PNG file
* @param {string} path
*/
startOCR(path) {
this.parser.ocr(path);
}
}
module.exports = IlsFaxParser;