Tuesday, 23 January 2018

Google texter - v0.2

This experimental script creates a random text by using the google search as source.

/*
** Google texter - v0.2
** This experimental script creates a random text by
** using google search as source.
**
** Warning: Do not execute this script on a webserver!
** This script was made to execute from a command line.
**
** (c) Jonas John, <www.jonasjohn.de>
** License: BSD (http://en.wikipedia.org/wiki/BSD_licenses)
*/

//
// Configuration:
//

// start words to search for:
$words = 'php is';

// stop the script after X words
$text_length = 100;

// search for X words on google
$search_length = 3;

// search in the first X google results for new words
$google_pages = 50;

// use this google server
$google_server = 'www.google.com';


// start the search progress
google_texter($words, $text_length, $search_length);



function get_url($url){

    // create a new curl resource
    $ch = curl_init();

    // set URL to download
    curl_setopt($ch, CURLOPT_URL, $url);

    // user agent:
    $browser = "Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.8.0.4)";
    curl_setopt($ch, CURLOPT_USERAGENT, $browser);

    // remove header? 0 = yes, 1 = no
    curl_setopt($ch, CURLOPT_HEADER, 0);

    // should curl return or print the data? true = return, false = print
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);

    // timeout in seconds
    curl_setopt($ch, CURLOPT_TIMEOUT, 10);

    // download the given URL, and return output
    $output = curl_exec($ch);

    // close the curl resource, and free system resources
    curl_close($ch);

    // print output
    return $output;
}

function get_word($words, $search_length){
    global $google_pages;
    global $google_server;

    // split words into an array
    $words = explode(" ", $words);   

    $_words = array_slice($words, ($search_length * -1));

    $words = implode(' ', $_words);
    $words = strtolower($words);


    $url = 'http://'.$google_server.'/';
    $url .= 'search?num='.$google_pages.'&';
    $url .= 'q=%22'.urlencode($words).'%22&';
    $url .= 'btnG=Search';

    $content = get_url($url);

    $content = strip_tags($content);
    $content = strtolower($content);
    $content = str_replace("\r\n", " ", $content);
    $content = str_replace("\r", " ", $content);
    $content = str_replace("\n", " ", $content);
    $content = str_replace("\t", " ", $content);
    $content = str_replace("<", " ", $content);
    $content = str_replace(">", " ", $content);
    $content = str_replace('"', " ", $content);
    $content = str_replace("'", " ", $content);
    $content = str_replace("-", " ", $content);
    $content = str_replace(".", " ", $content);

    preg_match_all('/'.$words.' ([0-9a-zA-Z������\?!]+)/', $content, $m);

    $next_word = isset($m[1]) ? $m[1] : array();

    $next_word = array_count_values($next_word);

    arsort($next_word);

    $next_word = array_keys($next_word);

    $r = rand(0,1);

    if (isset($next_word[$r])){
        return $next_word[$r];
    }

    if (isset($next_word[0])){
        return $next_word[0];
    }   
    return '';
}

function google_texter($start_words, $text_length, $search_length){

    $word = $start_words;
    print $word;

    for ($x=0; $x < $text_length; $x++){
        $w = get_word($word, $search_length);
        $word .= ' ' . $w;
        print ' ' . $w;
    }

}

<b>Example output:</b><br/><br/><b>php is</b> ... not the answer by blue october on the south beach diet is not a business associate agreement is required to use open source<br/><br/><b>java is</b> ... not my favorite language for many system management tasks for the communicative signals this worksheet to the students<br/><br/><b>google says</b> ... it has no interest in the public sector is a huge mistake by the lake is approximately 10 minutes from the ordinary general shareholders

0 comments:

Post a Comment