Powered By Blogger

Search Here!

Tuesday, October 8, 2024

Read Excel Content


const XLSX = require("xlsx");
/*
 * Reads content from a specified or latest downloaded Excel file.
 *
 * @param {string|null} [fileName=null] - The name of the Excel file.
*If null, the latest downloaded file will be used.
 * @param {number|null} [rowToCheck=null] - The row number in the
 *Excel content to retrieve (1-based index). If null, all rows will
 *be returned.
 * @returns {Promise<Object|Object[]>} - The content of the
*specified row or all rows if rowToCheck is not provided.
 */
async function getExcelContent(fileName = null, rowToCheck = null) {
    // Define the download directory path
    const downloadDir = CONSTANTS.FILE.FILE_PATH;

    let excelFilePath;

    if (fileName !== null) {
        // Use the provided filename to construct the file path
        excelFilePath = `${downloadDir}/${fileName}`;
        console.log(`Reading content from the specified file:
                        ${fileName}`);
    } else {
        // Get the latest downloaded file name
        const latestDownloadedFileName =
                    await this.getLatestDownloadedFileName();

      // Construct the full path of the latest downloaded Excel file
        excelFilePath = `${downloadDir}/${latestDownloadedFileName}`;
        console.log(
            `Reading content from the latest downloaded file:
                        ${latestDownloadedFileName}`
        );
    }

    // Read the Excel file
    const workbook = XLSX.readFile(excelFilePath);
    const sheetName = workbook.SheetNames[0]; // Get the first sheet
    const sheet = workbook.Sheets[sheetName];
    const data = XLSX.utils.sheet_to_json(sheet);
    // Convert sheet to JSON

    // Print each row of the Excel content in a prettier format
    console.log(`Content of the file ${fileName ||
                                "latest downloaded Excel"}:`);
    data.forEach((row, index) => {
        console.log(`Row ${index + 1}:`);
        for (const [key, value] of Object.entries(row)) {
            console.log(`  ${key}: ${value}`);
        }
        console.log(""); // Add a blank line between rows for
                // readability
    });

    // Return data based on the specified row or all rows
    //if rowToCheck is not provided
    if (rowToCheck !== null) {
        if (rowToCheck < 1 || rowToCheck > data.length) {
           console.warn(`Row number ${rowToCheck} is out of range.`);
           return {}; // Return an empty object if the row number
                // is out of range
        }
        return data[rowToCheck - 1]; // Return the specified row
    }

    return data; // Return all rows
}

Get Pdf Page Counts

 

const pdfParse = require("pdf-parse"); 
* Retrieves the total number of pages in a specified or latest
downloaded PDF file.
 *
 * @param {string|null} [fileName=null] - The name of the PDF file.
*If null, the latest downloaded file will be used.
 * @returns {Promise<number>} - The total number of pages in the
*PDF file.
 */
async function getPdfPageCount(fileName = null) {
    // Define the download directory path
    const downloadDir = CONSTANTS.FILE.FILE_PATH;

    let pdfFilePath;

    if (fileName !== null) {
        // Use the provided filename to construct the file path
        pdfFilePath = `${downloadDir}/${fileName}`;
        console.log(`Getting page count from the specified file:
                        ${fileName}`);
    } else {
        // Get the latest downloaded file name
        const latestDownloadedFileName = await
                        this.getLatestDownloadedFileName();

        // Construct the full path of the latest downloaded PDF file
        pdfFilePath = `${downloadDir}/${latestDownloadedFileName}`;
        console.log(
            `Getting page count from the latest downloaded file:
                            ${latestDownloadedFileName}`
        );
    }

    // Read the PDF file
    const dataBuffer = fs.readFileSync(pdfFilePath);

    // Parse the PDF and extract data
    const data = await pdfParse(dataBuffer);

    // Return the total number of pages
    return data.numpages; // This will return the number of pages
    // starting from 1
}

Get Pdf Text Content Line Based

 

const pdfParse = require("pdf-parse"); 
* Reads the text content from a specified or latest downloaded
PDF file.
 *
 * @param {string|null} [fileName=null] - The name of the PDF file.
If null, the latest downloaded file will be used.
 * @param {number|null} [lineToCheck=null] - The line number in the
PDF content to retrieve (1-based index). If null, all lines will
be returned.
 * @returns {Promise<string|string[]>} - The text content of
the specified line or all lines if lineToCheck is not provided.
 */
async function getPdfTextContentLineBased(fileName = null,
                lineToCheck = null) {
    // Define the download directory path
    const downloadDir = CONSTANTS.FILE.FILE_PATH;

    let pdfFilePath;

    if (fileName !== null) {
        // Use the provided filename to construct the file path
        pdfFilePath = `${downloadDir}/${fileName}`;
        console.log(`Reading content from the specified file:
        ${fileName}`);
    } else {
        // Get the latest downloaded file name
        const latestDownloadedFileName =
                await this.getLatestDownloadedFileName();

        // Construct the full path of the latest downloaded PDF file
        pdfFilePath = `${downloadDir}/${latestDownloadedFileName}`;
        console.log(
            `Reading content from the latest downloaded file:
            ${latestDownloadedFileName}`
        );
    }

    // Read the PDF file
    const dataBuffer = fs.readFileSync(pdfFilePath);

    // Parse the PDF and extract text
    const data = await pdfParse(dataBuffer);

    // Split text into lines
    const lines = data.text.split("\n");

    // Print all lines of the PDF text content
    console.log(`Content of the file ${fileName ||
                    "latest downloaded PDF"}:`);
    console.table(
        lines.map((line, index) => ({ Line: index + 1,
                        Content: line.trim() }))
    );

    // Return text based on the specified line or all lines
if lineToCheck is not provided
    if (lineToCheck !== null) {
        if (lineToCheck < 1 || lineToCheck > lines.length) {
            console.warn(`Line number ${lineToCheck} is out of
                                           range.`);
            return ""; // Return an empty string if
the line number is out of range
        }
        return lines[lineToCheck - 1].trim(); // Return the
specified line
    }

    return lines.map((line) => line.trim()); // Return all lines
}