| 
<?php/****************************************************************
 * [Forker]
 *
 * Example: MapReduce example counts the appearance of each
 word in a set of documents
 
 * Usage  : php examples/MapReduce/demo.map-reduce.php > test-mp
 * Storage: FileStorage
 ****************************************************************/
 require 'vendor/autoload.php';
 
 use Forker\Forker;
 use Forker\Storage\FileStorage;
 
 $myResult = 0;
 $myTasks = array(
 'quijote-1.txt',
 'quijote-2.txt',
 'quijote-3.txt',
 );
 
 $numberOfSubTasks = 3;
 
 $forker = new Forker(new FileStorage, $myTasks, $numberOfSubTasks);
 $path   = dirname(__FILE__);
 
 // MAP
 $forker->fork(function($key, $fileName, $emit) use($path){
 
 $file_to_get = "$path/$fileName";
 $content     = file_get_contents($file_to_get);
 
 foreach(getUTF8Words($content) as $word) {
 $emit($word, 1);
 }
 
 });
 
 // REDUCE
 $mapped = $forker->fetch();
 
 // We dont set here the number of sub tasks,
 // since we don't know the total number
 $forker = new Forker(new FileStorage('/tmp/reduced-words'), $mapped);
 
 $forker->fork(function($word, $counts, $emit) {
 $emit($word, is_array($counts) ? count($counts) : 1);
 });
 
 $allWords = $forker->fetch();
 
 arsort($allWords, SORT_NUMERIC);
 
 // First 10 words most used :)
 $cont = 10;
 
 foreach($allWords as $word => $counts) {
 echo $word . " (". $counts .")\n";
 if (! --$cont) break;
 }
 
 //////////////////////////////////////////////////////////
 function getUTF8Words($text)
 {
 $match_arr = array();
 
 //http://stackoverflow.com/questions/10684183/extract-words-from-string-with-preg-match-all
 if(preg_match_all('/([a-zA-Z]|\xC3[\x80-\x96\x98-\xB6\xB8-\xBF]|\xC5[\x92\x93\xA0\xA1\xB8\xBD\xBE]){3,}/', $text, $match_arr)) {
 return $match_arr[0];
 }
 return array();
 }
 |