Powered By Blogger

Search Here!

Showing posts with label Content. Show all posts
Showing posts with label Content. Show all posts

Tuesday, October 8, 2024

Get Pdf Text Content Line Based

 

const pdfParse = require("pdf-parse"); 
* Reads the text content from a specified or latest downloaded
PDF file.
 *
 * @param {string|null} [fileName=null] - The name of the PDF file.
If null, the latest downloaded file will be used.
 * @param {number|null} [lineToCheck=null] - The line number in the
PDF content to retrieve (1-based index). If null, all lines will
be returned.
 * @returns {Promise<string|string[]>} - The text content of
the specified line or all lines if lineToCheck is not provided.
 */
async function getPdfTextContentLineBased(fileName = null,
                lineToCheck = null) {
    // Define the download directory path
    const downloadDir = CONSTANTS.FILE.FILE_PATH;

    let pdfFilePath;

    if (fileName !== null) {
        // Use the provided filename to construct the file path
        pdfFilePath = `${downloadDir}/${fileName}`;
        console.log(`Reading content from the specified file:
        ${fileName}`);
    } else {
        // Get the latest downloaded file name
        const latestDownloadedFileName =
                await this.getLatestDownloadedFileName();

        // Construct the full path of the latest downloaded PDF file
        pdfFilePath = `${downloadDir}/${latestDownloadedFileName}`;
        console.log(
            `Reading content from the latest downloaded file:
            ${latestDownloadedFileName}`
        );
    }

    // Read the PDF file
    const dataBuffer = fs.readFileSync(pdfFilePath);

    // Parse the PDF and extract text
    const data = await pdfParse(dataBuffer);

    // Split text into lines
    const lines = data.text.split("\n");

    // Print all lines of the PDF text content
    console.log(`Content of the file ${fileName ||
                    "latest downloaded PDF"}:`);
    console.table(
        lines.map((line, index) => ({ Line: index + 1,
                        Content: line.trim() }))
    );

    // Return text based on the specified line or all lines
if lineToCheck is not provided
    if (lineToCheck !== null) {
        if (lineToCheck < 1 || lineToCheck > lines.length) {
            console.warn(`Line number ${lineToCheck} is out of
                                           range.`);
            return ""; // Return an empty string if
the line number is out of range
        }
        return lines[lineToCheck - 1].trim(); // Return the
specified line
    }

    return lines.map((line) => line.trim()); // Return all lines
}