Wikipedista:DaBlerBot/src: Porovnání verzí

Smazaný obsah Přidaný obsah
Bez shrnutí editace
m aktualizace
Řádek 3:
<?php
/*
PHP-WikiBot for PHP
 
VERSION: 20092010-0710-1506
 
AUTHOR: [[cs:User:DaBler]]
Řádek 19:
* odstranit prázdné odrážky
* z "; term" odstranit tučné a italiku
* nahradit [http://cs.wikipedia.org/wiki/...] interními odkazy
*/
 
Řádek 26 ⟶ 27:
private $user;
private $pass;
 
private $token;
private $user_agent;
 
private static $dict = array();
Řádek 32 ⟶ 36:
const SLEEP = 5;
 
public function __construct($base='http://cs.wikipedia.org/w/index.php', $user='DaBlerBot', $pass='výchozí heslo')
{
$this->base = $base;
$this->user = $user;
$this->pass = $pass;
$this->token = '';
$this->user_agent = 'WikiBot for PHP (bot=cs:User:DaBlerBot, operator=cs:User:DaBler)';
}
 
Řádek 90 ⟶ 96:
if(FALSE === $this->post($page, $name))
{
echothrow "new Exception('Post fails, exiting...\n"!');
return FALSE;
}
}
Řádek 104 ⟶ 109:
 
echo "Done.\n";
}
 
private function getToken()
{
$user = urlencode($this->user);
$pass = urlencode($this->pass);
$url = "{$this->base}?title=Special:Userlogin";
 
$token = urlencode($this->token);
 
$c = curl_init();
curl_setopt($c, CURLOPT_URL, $url);
curl_setopt($c, CURLOPT_COOKIEJAR, self::COOKIEFILE);
curl_setopt($c, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($c, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($c, CURLOPT_USERAGENT, "WikiBot for PHP (bot=cs:User:DaBlerBot, operator=cs:User:DaBler)");
curl_setopt($c, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($c, CURLOPT_USERAGENT, $this->user_agent);
 
$r = curl_exec($c);
 
if( FALSE === $r)
return FALSE;
 
$pattern = '/<input .*name="wpLoginToken" .*value="([0-9a-z]+)".*>/';
$matches = array();
preg_match($pattern, $r, $matches);
if( count($matches) != 2)
return FALSE;
 
$this->token = $matches[1];
// echo "New token: {$this->token}\n";
 
return TRUE;
}
 
private function login()
{
$this->getToken();
 
$user = urlencode($this->user);
$pass = urlencode($this->pass);
$url = "{$this->base}?title=Special:Userlogin&action=submitsubmitlogin&type=login";
 
$token = urlencode($this->token);
 
$c = curl_init();
curl_setopt($c, CURLOPT_URL, $url);
curl_setopt($c, CURLOPT_COOKIEJAR, self::COOKIEFILE);
curl_setopt($c, CURLOPT_POSTCURLOPT_COOKIEFILE, 1self::COOKIEFILE);
curl_setopt($c, CURLOPT_POST, TRUE);
curl_setopt($c, CURLOPT_POSTFIELDS, "wpName=${user}&wpLoginattemptwpLoginAttempt=Log+in&wpPassword=${pass}&wpRememberwpLoginToken=1&wpRetype=&wpEmail=&wpSkipCookieCheck=0${token}" );
curl_setopt($c, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($c, CURLOPT_USERAGENT, $this->user_agent);
 
$r = curl_exec($c);
 
returnif( FALSE !== $r && ($r === "" || FALSE !== strpos($r,"var wgUserName = \"{$this->user}\";");) )
return TRUE;
else
return FALSE;
}
 
Řádek 132 ⟶ 181:
curl_setopt($c, CURLOPT_COOKIEFILE, self::COOKIEFILE);
curl_setopt($c, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($c, CURLOPT_USERAGENT, $this->user_agent);
$r = curl_exec($c);
 
Řádek 167 ⟶ 217:
// '''
$str = preg_replace('/(.*?)\'\'\'(.*?)\'\'\'(.*?)/m', '\1\2\3', $str);
 
// ''
$str = preg_replace('/(.*?)\'\'(.*?)\'\'(.*?)/m', '\1\2\3', $str);
 
// [[|]]
$str = preg_replace('/(.*?)\[\[(([^\]]*?)\|)?(.*?)\]\](.*?)/m', '\1\4\5', $str);
 
// :
$str = preg_replace('/^ *(.*?)( *:?)* *$/', '\1', $str);
Řádek 192 ⟶ 245:
$str = preg_replace('/^\n*(=+ .+? =+)$/m', "\n\\1", $str, -1, $tmp);
 
return !($old !== $str);
}
 
Řádek 202 ⟶ 255:
$str = preg_replace('/\[\[ *(:?) *Category *: *(.+?) *\]\]/im', '[[\1Kategorie:\2]]', $str, -1, $tmp);
 
return !($old !== $str);
}
 
Řádek 223 ⟶ 276:
$str = preg_replace('|^([\*#;:]+) (.*?) *<br */?'.'>\r?$|m', '\1 \2', $str, -1, $tmp);
 
return !($old !== $str);
}
 
Řádek 235 ⟶ 288:
$str = preg_replace('/&hellip;/m', '…', $str, -1, $tmp);
 
return !($old !== $str);
}
 
Řádek 508 ⟶ 561:
'Ghz' => 'GHz',
);
 
//jednorázově
$replace += array( 'aktualní verze' => 'aktuální verze' );
 
self::$dict = $replace;
Řádek 517 ⟶ 573:
 
$sep = '[ ,\.;*#\-–—…&\(\)\/\[\]|\'{}=:<>?!"„“\n]';
//"
 
$replace = self::$dict;
Řádek 528 ⟶ 585:
$str = preg_replace('|\[http://http://([^\]]+?) *\]|m', '[http://\1]', $str);
 
return !($old !== $str);
}
 
Řádek 565 ⟶ 622:
$title = rawurlencode(str_replace(' ', '_', $name));
 
$mp = $page['attr']array();
foreach($page['attr'] as $aname=>$val)
$mp[$aname] = $val;
 
$url = "{$this->base}?title=${title}&action=submit";
Řádek 576 ⟶ 635:
curl_setopt($c, CURLOPT_HTTPHEADER, array('Expect:'));
curl_setopt($c, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($c, CURLOPT_USERAGENT, $this->user_agent);
$ret = curl_exec($c);
 
returnif( '' === $ret; )
return TRUE;
else
return FALSE;
}
 
Řádek 586 ⟶ 649:
$bot->get_dictionary();
$bot->run();
 
?>
</source>