Tuesday, 23 January 2018

Count lines and their occurrences

These two functions show how to create a list of all lines and their occurrences in a given file or string.

/**
 * LineStatisticsByFile
 * Creates a list with all lines of the given file and their occurrences.
 *
 * @param     string
 * @param     bool
 * @return    string
 */
function LineStatisticsByFile($Filepath, $IgnoreCase=false, $NewLine="\n"){

    if (!file_exists($Filepath)){
        $ErrorMsg  = 'LineStatisticsByFile error: ';
        $ErrorMsg .= 'The given file ' . $Filepath . ' does not exist!';
        die($ErrorMsg);
    }

    return LineStatisticsByString(file_get_contents($Filepath), $IgnoreCase, $NewLine);
}

/**
 * LineStatisticsByString
 * Creates a list with all lines of the given string and their occurrences.
 *
 * @param     string
 * @param     bool
 * @return    string
 */
function LineStatisticsByString($Lines, $IgnoreCase=false, $NewLine="\n"){

    if (is_array($Lines))
        $Lines = implode($NewLine, $Lines);

    $Lines = explode($NewLine, $Lines);

    $LineArray = array();

    // Go trough all lines of the given file
    for ($Line=0; $Line < count($Lines); $Line++){

        // Trim whitespace for the current line
        $CurrentLine = trim($Lines[$Line]);

        // Skip empty lines
        if ($CurrentLine == '')
            continue;

        // Use the line contents as array key
        $LineKey = $CurrentLine;

        if ($IgnoreCase)
            $LineKey = strtolower($LineKey);

        // Check if the array key already exists,
        // and increase the counters
        if (isset($LineArray[$LineKey]))
            $LineArray[$LineKey] += 1;   
        else               
            $LineArray[$LineKey] = 1;       
    }

    // Sort the array
    arsort($LineArray);

    // Create a new readable array for the output file
    $NewLineArray = array();   
    while(list($LineKey, $LineValue) = each($LineArray)){       
        $NewLineArray[] = $LineKey . ': ' . $LineValue;   
    }

    // Return how many lines were counted
    return implode("\n", $NewLineArray);   
}

// Count all lines of the "Testfile.txt" and create a
// statistic
$ResultString = LineStatisticsByFile('Testfile.txt');

/*
The ResultString now contains:

LineA: 3
LineB: 2
LineC: 1
*/

$stat = array_count_values(file('test.txt', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES));

or for the case insensitive version,

$stat = array_count_values(array_map('strtolower', file('test.txt', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES)));

print_r($stat);

0 comments:

Post a Comment