const pdfParse = require("pdf-parse");
* Reads the text content from a specified or latest downloaded
PDF file.
*
* @param {string|null} [fileName=null] - The name of the PDF file.
If null, the latest downloaded file will be used.
* @param {number|null} [lineToCheck=null] - The line number in the
PDF content to retrieve (1-based index). If null, all lines will
be returned.
* @returns {Promise<string|string[]>} - The text content of
the specified line or all lines if lineToCheck is not provided.
*/
async function getPdfTextContentLineBased(fileName = null,
lineToCheck = null) {
// Define the download directory path
const downloadDir = CONSTANTS.FILE.FILE_PATH;
let pdfFilePath;
if (fileName !== null) {
// Use the provided filename to construct the file path
pdfFilePath = `${downloadDir}/${fileName}`;
console.log(`Reading content from the specified file:
${fileName}`);
} else {
// Get the latest downloaded file name
const latestDownloadedFileName =
await this.getLatestDownloadedFileName();
// Construct the full path of the latest downloaded PDF file
pdfFilePath = `${downloadDir}/${latestDownloadedFileName}`;
console.log(
`Reading content from the latest downloaded file:
${latestDownloadedFileName}`
);
}
// Read the PDF file
const dataBuffer = fs.readFileSync(pdfFilePath);
// Parse the PDF and extract text
const data = await pdfParse(dataBuffer);
// Split text into lines
const lines = data.text.split("\n");
// Print all lines of the PDF text content
console.log(`Content of the file ${fileName ||
"latest downloaded PDF"}:`);
console.table(
lines.map((line, index) => ({ Line: index + 1,
Content: line.trim() }))
);
// Return text based on the specified line or all lines
if lineToCheck is not provided
if (lineToCheck !== null) {
if (lineToCheck < 1 || lineToCheck > lines.length) {
console.warn(`Line number ${lineToCheck} is out of
range.`);
return ""; // Return an empty string if
the line number is out of range
}
return lines[lineToCheck - 1].trim(); // Return the
specified line
}
return lines.map((line) => line.trim()); // Return all lines
}
No comments:
Post a Comment