This experimental script creates a random text by using the google search as source.
/*
** Google texter - v0.2
** This experimental script creates a random text by
** using google search as source.
**
** Warning: Do not execute this script on a webserver!
** This script was made to execute from a command line.
**
** (c) Jonas John, <www.jonasjohn.de>
** License: BSD (http://en.wikipedia.org/wiki/BSD_licenses)
*/
//
// Configuration:
//
// start words to search for:
$words = 'php is';
// stop the script after X words
$text_length = 100;
// search for X words on google
$search_length = 3;
// search in the first X google results for new words
$google_pages = 50;
// use this google server
$google_server = 'www.google.com';
// start the search progress
google_texter($words, $text_length, $search_length);
function get_url($url){
// create a new curl resource
$ch = curl_init();
// set URL to download
curl_setopt($ch, CURLOPT_URL, $url);
// user agent:
$browser = "Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.8.0.4)";
curl_setopt($ch, CURLOPT_USERAGENT, $browser);
// remove header? 0 = yes, 1 = no
curl_setopt($ch, CURLOPT_HEADER, 0);
// should curl return or print the data? true = return, false = print
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
// timeout in seconds
curl_setopt($ch, CURLOPT_TIMEOUT, 10);
// download the given URL, and return output
$output = curl_exec($ch);
// close the curl resource, and free system resources
curl_close($ch);
// print output
return $output;
}
function get_word($words, $search_length){
global $google_pages;
global $google_server;
// split words into an array
$words = explode(" ", $words);
$_words = array_slice($words, ($search_length * -1));
$words = implode(' ', $_words);
$words = strtolower($words);
$url = 'http://'.$google_server.'/';
$url .= 'search?num='.$google_pages.'&';
$url .= 'q=%22'.urlencode($words).'%22&';
$url .= 'btnG=Search';
$content = get_url($url);
$content = strip_tags($content);
$content = strtolower($content);
$content = str_replace("\r\n", " ", $content);
$content = str_replace("\r", " ", $content);
$content = str_replace("\n", " ", $content);
$content = str_replace("\t", " ", $content);
$content = str_replace("<", " ", $content);
$content = str_replace(">", " ", $content);
$content = str_replace('"', " ", $content);
$content = str_replace("'", " ", $content);
$content = str_replace("-", " ", $content);
$content = str_replace(".", " ", $content);
preg_match_all('/'.$words.' ([0-9a-zA-Z������\?!]+)/', $content, $m);
$next_word = isset($m[1]) ? $m[1] : array();
$next_word = array_count_values($next_word);
arsort($next_word);
$next_word = array_keys($next_word);
$r = rand(0,1);
if (isset($next_word[$r])){
return $next_word[$r];
}
if (isset($next_word[0])){
return $next_word[0];
}
return '';
}
function google_texter($start_words, $text_length, $search_length){
$word = $start_words;
print $word;
for ($x=0; $x < $text_length; $x++){
$w = get_word($word, $search_length);
$word .= ' ' . $w;
print ' ' . $w;
}
}
<b>Example output:</b><br/><br/><b>php is</b> ... not the answer by blue october on the south beach diet is not a business associate agreement is required to use open source<br/><br/><b>java is</b> ... not my favorite language for many system management tasks for the communicative signals this worksheet to the students<br/><br/><b>google says</b> ... it has no interest in the public sector is a huge mistake by the lake is approximately 10 minutes from the ordinary general shareholders
/*
** Google texter - v0.2
** This experimental script creates a random text by
** using google search as source.
**
** Warning: Do not execute this script on a webserver!
** This script was made to execute from a command line.
**
** (c) Jonas John, <www.jonasjohn.de>
** License: BSD (http://en.wikipedia.org/wiki/BSD_licenses)
*/
//
// Configuration:
//
// start words to search for:
$words = 'php is';
// stop the script after X words
$text_length = 100;
// search for X words on google
$search_length = 3;
// search in the first X google results for new words
$google_pages = 50;
// use this google server
$google_server = 'www.google.com';
// start the search progress
google_texter($words, $text_length, $search_length);
function get_url($url){
// create a new curl resource
$ch = curl_init();
// set URL to download
curl_setopt($ch, CURLOPT_URL, $url);
// user agent:
$browser = "Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.8.0.4)";
curl_setopt($ch, CURLOPT_USERAGENT, $browser);
// remove header? 0 = yes, 1 = no
curl_setopt($ch, CURLOPT_HEADER, 0);
// should curl return or print the data? true = return, false = print
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
// timeout in seconds
curl_setopt($ch, CURLOPT_TIMEOUT, 10);
// download the given URL, and return output
$output = curl_exec($ch);
// close the curl resource, and free system resources
curl_close($ch);
// print output
return $output;
}
function get_word($words, $search_length){
global $google_pages;
global $google_server;
// split words into an array
$words = explode(" ", $words);
$_words = array_slice($words, ($search_length * -1));
$words = implode(' ', $_words);
$words = strtolower($words);
$url = 'http://'.$google_server.'/';
$url .= 'search?num='.$google_pages.'&';
$url .= 'q=%22'.urlencode($words).'%22&';
$url .= 'btnG=Search';
$content = get_url($url);
$content = strip_tags($content);
$content = strtolower($content);
$content = str_replace("\r\n", " ", $content);
$content = str_replace("\r", " ", $content);
$content = str_replace("\n", " ", $content);
$content = str_replace("\t", " ", $content);
$content = str_replace("<", " ", $content);
$content = str_replace(">", " ", $content);
$content = str_replace('"', " ", $content);
$content = str_replace("'", " ", $content);
$content = str_replace("-", " ", $content);
$content = str_replace(".", " ", $content);
preg_match_all('/'.$words.' ([0-9a-zA-Z������\?!]+)/', $content, $m);
$next_word = isset($m[1]) ? $m[1] : array();
$next_word = array_count_values($next_word);
arsort($next_word);
$next_word = array_keys($next_word);
$r = rand(0,1);
if (isset($next_word[$r])){
return $next_word[$r];
}
if (isset($next_word[0])){
return $next_word[0];
}
return '';
}
function google_texter($start_words, $text_length, $search_length){
$word = $start_words;
print $word;
for ($x=0; $x < $text_length; $x++){
$w = get_word($word, $search_length);
$word .= ' ' . $w;
print ' ' . $w;
}
}
<b>Example output:</b><br/><br/><b>php is</b> ... not the answer by blue october on the south beach diet is not a business associate agreement is required to use open source<br/><br/><b>java is</b> ... not my favorite language for many system management tasks for the communicative signals this worksheet to the students<br/><br/><b>google says</b> ... it has no interest in the public sector is a huge mistake by the lake is approximately 10 minutes from the ordinary general shareholders
0 comments:
Post a Comment