derUwe.de ist eine private Seite |
Impressum
erzeugt mit Wordpress |
benutzt ein modifiziertes Snowblind Theme
PHP-Browser ist eine in PHP realisierte Klasse die einen Web-Browser simuliert. (Genau genommen sind es zwei Klassen.) Damit ist es einfach möglich HEAD-, GET- und POST-Requests auszuführen. Bei POST-Requests werden auch Fileuploads unterstützt.
Die Anwendungsbereiche sind vielfältig, vom Auswerten der HTTP-Header bestimmter Server bis hin zu automatisierten Up- und Downloads.
Das Script hat natürlich keinen Anspruch auf Perfektion. Es soll nur als einfaches Beispiel dienen. Getestet wurde es zuletzt unter PHP 4.3.10.
Features
- HTTP 1.0
- HEAD-, GET-, POST-Requests
- HTTP-Basic Authentifikation
- unterstützt HTTP-Proxys
- (multiple) Fileuploads mit POST
- Cookies auslesen und senden
- einfache Manipulation der typischen Header
- Hinzufügen von eigene Headern
- aktuell kein Support für https (ssl) und teilweise Probleme mit utf8
Zur Zeit ist die Proxy-Authentifizierung noch nicht realisiert. Downloads werden in einer Variablen gehalten und nicht direkt auf die Festplatte geschreiben, was die Größe der Downloads abhängig von der Konfiguration des maximalen Speichers für PHP-Scripte macht.
Dokumentation
Momentan keine vorhanden. Selbst ist der Mann (oder die Frau).
Hier jedoch ein paar Bemerkungen zur Funktionsweise. Die Klasse browser() parst aller übergebenen Variablen. Dann muß mit der Funktion site() des Browser-Objektes ein site()-Objekt unter Angabe der URL erzeugt werden. Über den Aufruf der Funktionen head(), get() oder post() des site-Objektes wird der Request aufgeführt und das site()-Objekt enthält die zurückgelieferten Daten, die es teilweise schon parst. In gewisser Weise ist das Prinzip mit einem normalen Webbrowser und dessen verschiedenen Fenstern oder Tabs vergleichbar.
Copyright
Freigegeben unter GPL
Wenn möglich bittet der Autor um eine Verlinkung seiner (dieser) Homepage. Bitte direkt die Domain und keine Deep-Links.
Beispiele
Beispiel einer Grundkonfiguration
$browser = new browser();
$browser->user_agent="Mozilla/5.0 (X11; U; Linux i686; de-DE; rv:1.7.6) Gecko/20050309 Firefox/1.0.1";
$browser->language="de-de,de;q=0.5";
$browser->charset="ISO-8859-1,utf-8;q=0.7,*;q=0.7";
$browser->accept="text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5";
eine Seite per GET über den Proxy holen
$browser = new browser();
$browser->set_proxy("http://localhost:8080/");
$site = $browser->site("http://example.com");
$site->get();
echo $site->get_content();
HEAD einer Seite mit HTTP-Authentifikation auf Port 8080
$browser = new browser();
$site = $browser->site("http://user:pass@www.example.com:8080/");
$site->head();
echo $site->get_header();
Senden einer POST-Anfrage
$browser = new browser();
$site = $browser->site("http://www.example.com");
$site->post(array("foo"=>"bar","foo2"=>"bar2"));
echo $site->get_content();
Senden einer POST-Anfrage incl. Senden einer Datei
$browser = new browser();
$site = $browser->site("http://www.example.com");
$site->post(array("foo"=>"bar","foo2"=>"bar2"),array(array("file"=>"/tmp/my.jpg","name"=>"file","type"=>"image/jpeg")));
echo $site->get_content();
eine Datei via POST absenden
$browser = new browser();
$site = $browser->site("http://www.example.com");
$site->post(false,array(array("file"=>"/tmp/upload.txt","name"=>"file","type"=>"text/html")));
echo $site->get_content();
einfacher Link-Checker
$browser = new browser();
$site = $browser->site("http://www.example.com/link.htm");
$site->get();
echo $site->get_status();
Cookies lesen und beim zweiten Request diese wieder senden
$browser = new browser();
$site = $browser->site("http://www.example.com");
$site->get();
$cookies_get = $site->get_cookies();
// (vereinfachtes) Umwandeln der Cookies in das benötigte Format
foreach ($cookie_get as $cookie) {
$cookies_send[$cookie['name']] = $cookie['value'];
}
// neuer Browser, Cookies setzen und neue Seite mit diesen Cookies aufrufen
$browser = new browser();
$browser->cookies=$cookies_send;
$site = $browser->site("http://www.example.com/with_cookies");
$site->get()
echo $site->get_header();
Debug – Sendet nichts, gibt nur die Header aus die sonst gesendet würden
$browser = new browser();
$bowser->debug = true;
$site = $browser->site("http://www.example.com/link.htm");
$site->get();
Download
Version 1.17
class browser
{
var $user_agent;
var $accept;
var $language;
var $charset;
var $referer;
var $cookies;
var $extra_headers;
var $proxy;
var $debug;
function browser()
{
// defaults, can override
$this->user_agent = 'PHP-Browser/1.0';
$this->accept = '*/*';
$this->language = 'de';
$this->charset = 'ISO-8859-1';
$this->referer = 'http://www.deruwe.de/';
$this->cookies = false;
$this->extra_headers = false;
$this->proxy = false;
$this->debug = false;
}
// set user-agent
// $user_agent as string
function set_user_agent($user_agent = false)
{
$this->user_agent = $user_agent;
}
// set content-type
// $accept as string
function set_accept($accept = false)
{
$this->accept = $accept;
}
// set language
// $language as string
function set_language($language = false)
{
$this->language = $language;
}
// set charset
// $charset as string
function set_charset($charset = false)
{
$this->charset = $charset ;
}
// set referer
// $referer as string
function set_referer($referer = false)
{
$this->referer = $referer ;
}
// set cookies
// $cookies as array
// format: array("key1"=>"value1","key2"=>"value2")
function set_cookies($cookies = false)
{
$this->cookies = $cookies ;
}
// set extra_header
// $headers as array
// format: array("key1: value","key2: value","key3: value")
function set_extra_headers ($extra_headers = false)
{
$this->extra_headers = $extra_headers;
}
// set proxy, if set proxy will use
// $proxy as string
// format: http://user:pass@server:port
function set_proxy($proxy = false)
{
$this->proxy = $proxy;
}
// set it to true and the request will only print out, not send, fsocket will not open
// $debug as boolean
function set_debug($debug = false)
{
$this->debug = $debug;
}
// reset all to defaults
function reset()
{
$this->browser();
}
function site($url)
{
return new site($url,$this->user_agent,$this->accept,$this->language,$this->charset,$this->referer,$this->cookies,$this->extra_headers,$this->debug,$this->proxy);
}
}
class site
{
var $url;
var $user_agent;
var $accept;
var $language;
var $charset;
var $referer;
var $cookies;
var $extra_headers;
var $proxy;
var $debug;
var $scheme;
var $host;
var $port;
var $path;
var $user;
var $pass;
var $proxy_host;
var $proxy_port;
var $proxy_user;
var $proxy_pass;
// private
var $socket;
var $data;
// contructor
function site($url,$user_agent,$accept,$language,$charset,$referer,$cookies,$extra_headers,$debug,$proxy)
{
$this->url = $url;
$this->user_agent = $user_agent;
$this->accept = $accept;
$this->language = $language;
$this->charset = $charset;
$this->referer = $referer;
$this->cookies = $cookies;
$this->extra_headers = $extra_headers;
$this->proxy = $proxy;
$this->debug = $debug;
$this->scheme = false;
$this->host = false;
$this->port = false;
$this->user = false;
$this->pass = false;
$this->proxy_host = false;
$this->proxy_port = false;
$this->proxy_user = false;
$this->proxy_pass = false;
// parse URL
$url_parts = parse_url($this->url);
$this->host = $url_parts['host'];
// setting scheme
if ( $url_parts['scheme'] ) {
$this->scheme = $url_parts['scheme'];
} else {
$this->scheme = "http";
}
// setting port
if ( $url_parts['port'] ) {
$this->port = $url_parts['port'];
} else {
$this->port = 80;
}
// setting path
if ( $url_parts['path'] ) {
$this->path = $url_parts['path'];
} else {
$this->path = '/';
}
// adding query to path
if ( $url_parts['query'] )
{
$this->path = $this->path.'?'.$url_parts['query'];
}
// authentification
if ( $url_parts['user'] )
{
if ( $url_parts['pass']) {
$this->pass = $url_parts['pass'];
} else {
$this->pass = "";
}
$this->user = $url_parts['user'];
}
// parse proxy url
if ($this->proxy) {
$url_parts = parse_url($this->proxy);
$this->proxy_host = $url_parts['host'];
// setting port
if ( $url_parts['port'] ) {
$this->proxy_port = $url_parts['port'];
} else {
$this->proxy_port = 8080;
}
// authentification für proxy
if ( $url_parts['user'] )
{
if ( $url_parts['pass']) {
$this->proxy_pass = $url_parts['pass'];
} else {
$this->proxy_pass = "";
}
$this->proxy_user = $url_parts['user'];
}
}
}
// GET a site, return boolean
function get()
{
if ($this->proxy) {
$connect_host = $this->proxy_host;
$connect_port = $this->proxy_port;
} else {
$connect_host = $this->host;
$connect_port = $this->port;
}
if ( $this->_fsockopen($connect_host, $connect_port) ) {
if ($this->proxy) {
if ( $this->port != 80 ) {
$header = 'GET '.$this->scheme.'://'.$this->host.$this->path.' HTTP/1.0'."\r\n";
} else {
$header = 'GET '.$this->scheme.'://'.$this->host.':'.$this->port.$this->path.' HTTP/1.0'."\r\n";
}
$header .= 'Host: '.$this->proxy_host."\r\n";
} else {
$header = 'GET '.$this->path.' HTTP/1.0'."\r\n";
$header .= 'Host: '.$this->host."\r\n";
}
if ( $this->user ) {
$header .= 'Authorization: Basic '.base64_encode($this->user.':'.$this->pass)."\r\n";
}
if ( $this->language !== false ) {
$header .= 'Accept-Language: '.$this->language."\r\n";
}
if ( $this->charset !== false ) {
$header .= 'Accept-Charset: '.$this->charset."\r\n";
}
if ( $this->user_agent !== false ) {
$header .= 'User-Agent: '.$this->user_agent."\r\n";
}
if ( $this->accept !== false ) {
$header .= 'Accept: '.$this->accept."\r\n";
}
if ( $this->referer !== false ) {
$header .= 'Referer: '.$this->referer."\r\n";
}
if ((is_array($this->extra_headers)) && (count($this->extra_headers))) {
reset($this->extra_headers);
foreach ($this->extra_headers as $extra_header) {
if ( $extra_header ) {
$header .= $extra_header."\r\n";
}
}
}
if ((is_array($this->cookies)) && (count($this->cookies))) {
$cookie = false;
reset($this->cookies);
foreach ($this->cookies as $var => $value) {
if ( ($var) && ($value != "") ) {
if ( ! $cookie) {
$cookie = $var.'='.$value;
} else {
$cookie .= '; '.$var.'='.$value;
}
}
}
if ($cookie) {
$header .= 'Cookie: '.$cookie."\r\n";
}
}
$header .= "Connection: close\r\n\r\n";
$this->_fputs($header);
$this->data = $this->_fgets();
$this->_fclose();
return true;
} else {
return false;
}
}
// HEAD a site, return boolean
function head()
{
if ($this->proxy) {
$connect_host = $this->proxy_host;
$connect_port = $this->proxy_port;
} else {
$connect_host = $this->host;
$connect_port = $this->port;
}
if ( $this->_fsockopen($connect_host, $connect_port) ) {
if ($this->proxy) {
if ( $this->port != 80 ) {
$header = 'HEAD '.$this->scheme.'://'.$this->host.$this->path.' HTTP/1.0'."\r\n";
} else {
$header = 'HEAD '.$this->scheme.'://'.$this->host.':'.$this->port.$this->path.' HTTP/1.0'."\r\n";
}
$header .= 'Host: '.$this->proxy_host."\r\n";
} else {
$header = 'HEAD '.$this->path.' '.' HTTP/1.0'."\r\n";
$header .= 'Host: '.$this->host."\r\n";
}
if ( $this->user ) {
$header .= 'Authorization: Basic '.base64_encode($this->user.':'.$this->pass)."\r\n";
}
if ( $this->language !== false ) {
$header .= 'Accept-Language: '.$this->language."\r\n";
}
if ( $this->charset !== false ) {
$header .= 'Accept-Charset: '.$this->charset."\r\n";
}
if ( $this->user_agent !== false ) {
$header .= 'User-Agent: '.$this->user_agent."\r\n";
}
if ( $this->accept !== false ) {
$header .= 'Accept: '.$this->accept."\r\n";
}
if ( $this->referer !== false ) {
$header .= 'Referer: '.$this->referer."\r\n";
}
if ((is_array($this->extra_headers)) && (count($this->extra_headers))) {
reset($this->extra_headers);
foreach ($this->extra_headers as $extra_header) {
if ( $extra_header ) {
$header .= $extra_header."\r\n";
}
}
}
if ((is_array($this->cookies)) && (count($this->cookies))) {
$cookie = false;
reset($this->cookies);
foreach ($this->cookies as $var => $value) {
if ( ($var) && ($value != "") ) {
if ( ! $cookie) {
$cookie = $var.'='.$value;
} else {
$cookie .= '; '.$var.'='.$value;
}
}
}
if ($cookie) {
$header .= 'Cookie: '.$cookie."\r\n";
}
}
$header .= "Connection: close\r\n\r\n";
$this->_fputs($header);
$this->data = $this->_fgets();
$this->_fclose();
return true;
} else {
return false;
}
}
// POST a site, return boolean
// $data data to send as array()
// format: array("key1"=>"value1","key2"=>"value2")
// $files files to send as array()
// format: array(array("name"=>"file1","file"=>"/filename","type"=>"text/html","rename"="(optional) newfilename"),array(...))
// before post check that files exist and readable !
function post($data_to_send = false,$files_to_send = false)
{
if ($this->proxy) {
$connect_host = $this->proxy_host;
$connect_port = $this->proxy_port;
} else {
$connect_host = $this->host;
$connect_port = $this->port;
}
if ( $this->_fsockopen($connect_host, $connect_port) ) {
if ( ! is_array($data_to_send) ) {
$data_to_send = array();
}
if ( ! is_array($files_to_send) ) {
$files_to_send = array();
}
if ($this->proxy) {
if ( $this->port != 80 ) {
$header = 'POST '.$this->scheme.'://'.$this->host.$this->path.' HTTP/1.0'."\r\n";
} else {
$header = 'POST '.$this->scheme.'://'.$this->host.':'.$this->port.$this->path.' HTTP/1.0'."\r\n";
}
$header .= 'Host: '.$this->proxy_host."\r\n";
} else {
$header = 'POST '.$this->path.' '.' HTTP/1.0'."\r\n";
$header .= 'Host: '.$this->host."\r\n";
}
if ( $this->user ) {
$header .= 'Authorization: Basic '.base64_encode($this->user.':'.$this->pass)."\r\n";
}
if ( $this->language !== false ) {
$header .= 'Accept-Language: '.$this->language."\r\n";
}
if ( $this->charset !== false ) {
$header .= 'Accept-Charset: '.$this->charset."\r\n";
}
if ( $this->user_agent !== false ) {
$header .= 'User-Agent: '.$this->user_agent."\r\n";
}
if ( $this->accept !== false ) {
$header .= 'Accept: '.$this->accept."\r\n";
}
if ( $this->referer !== false ) {
$header .= 'Referer: '.$this->referer."\r\n";
}
if ((is_array($this->extra_headers)) && (count($this->extra_headers))) {
reset($this->extra_headers);
foreach ($this->extra_headers as $extra_header) {
if ( $extra_header ) {
$header .= $extra_header."\r\n";
}
}
}
if ((is_array($this->cookies)) && (count($this->cookies))) {
$cookie = false;
reset($this->cookies);
foreach ($this->cookies as $var => $value) {
if ( ($var) && ($value != "") ) {
if ( ! $cookie) {
$cookie = $var.'='.$value;
} else {
$cookie .= '; '.$var.'='.$value;
}
}
}
if ($cookie) {
$header .= 'Cookie: '.$cookie."\r\n";
}
}
$header .= "Connection: close\r\n";
$this->_fputs($header);
if ( (count($data_to_send)) || (count($files_to_send)) ) {
srand((double)microtime()*1000000);
$boundary = "---------------------------".substr(md5(rand(0,32000)),0,10);
$this->_fputs('Content-Type: multipart/form-data; boundary='.$boundary."\r\n");
$length = 0;
// calculate Content-Length
reset($data_to_send);
foreach($data_to_send as $key=>$val) {
$length += 2+strlen($boundary)+strlen($key)+strlen($val)+strlen('Content-Disposition: form-data; name=""')+8;
}
reset($files_to_send);
foreach($files_to_send as $key=>$file) {
if ( ! $file['rename']) {
$file['rename'] = basename($file['file']);
}
$length += 2+strlen($boundary)+strlen('Content-Disposition: form-data; name="'.$file['name'].'"; filename="'.$file['rename'].'"')+strlen('Content-Type: '.$file['type'])+strlen('Content-Transfer-Encoding: binary')+10+filesize($file['file']);
}
$this->_fputs('Content-Length: '.strval($length)."\r\n\r\n");
if (count ($data_to_send)) {
reset($data_to_send);
foreach($data_to_send as $key=>$val) {
$this->_fputs('--'.$boundary."\r\n");
$this->_fputs('Content-Disposition: form-data; name="'.$key.'"'."\r\n\r\n".$val."\r\n");
}
}
if (count ($files_to_send)) {
reset($files_to_send);
foreach($files_to_send as $file) {
if ( (is_array($file)) && (count($file))) {
if ( ! $file['rename']) {
$file['rename'] = basename($file['file']);
}
$fh = fopen ($file['file'], "r");
$this->_fputs('--'.$boundary."\r\n");
$this->_fputs('Content-Disposition: form-data; name="'.$file['name'].'"; filename="'.$file['rename'].'"'."\r\n");
$this->_fputs('Content-Type: '.$file['type']."\r\n");
$this->_fputs('Content-Transfer-Encoding: binary'."\r\n\r\n");
$this->_fputs(fread ($fh, filesize ($file['file']))."\r\n");
fclose ($fh);
}
}
}
$this->_fputs('--'.$boundary.'--');
}
$this->data = $this->_fgets();
$this->_fclose();
return true;
} else {
return false;
}
}
// return http-status as string
function get_status()
{
$head = $this->get_headers();
reset($head);
foreach($head as $headline) {
if (preg_match('/HTTP\/(.*) ([0-9][0-9][0-9])(.*)/i',$headline)) {
preg_match ("/HTTP\/(.*) ([0-9][0-9][0-9])(.*)/i",$headline,$tmp);
if ((isset($tmp[2])) && (is_numeric($tmp[2]))) {
return $tmp[2];
}
}
}
return false;
}
// return content-type as string
// if $full == true return full content-type incl. q, otherwise only type (*/*)
function get_type($full = false)
{
$head = $this->get_headers();
reset($head);
foreach($head as $headline) {
if (preg_match('/^Content-Type: /i',$headline)) {
$type = preg_replace('/^Content-Type: /i', '', $headline);
if ($full) {
return $type;
} else {
$return = split(";",$type);
return $return[0];
}
}
}
return false;
}
// return content-lenght as integer
function get_length()
{
$head = $this->get_headers();
reset($head);
foreach($head as $headline) {
if (preg_match('/^Content-Length: /i',$headline)) {
$length = preg_replace('/^Content-Length: /i', '', $headline);
return $length;
}
}
return false;
}
// return new location if set
function get_location()
{
$head = $this->get_headers();
reset($head);
foreach($head as $headline) {
if (preg_match('/^Location: /i',$headline)) {
$location = preg_replace('/^Location: /i', '', $headline);
return $location;
}
}
return false;
}
// return cookies as muti-array or false
// format: array(array("name"=>"foo","value"=>"bar","path"=>"string","time"=>timestamp,"domain"=>"string","secure"=>boolean),array(...))
function get_cookies()
{
$head = $this->get_headers();
$cookies = false;
reset($head);
foreach($head as $headline) {
if (preg_match('/^Set-Cookie: /i',$headline)) {
if (! is_array($cookies) ) {
$cookies = array();
}
$headline = trim($headline);
$headline = preg_replace("/^Set-Cookie: /i", "", $headline);
$cookiesplit = split(";",$headline);
$cookieinfo = array();
// avr und value
list($cookieinfo['name'],$cookieinfo['value']) = split("=",$cookiesplit[0],2);
// zeit als timestamp
if ( $cookiesplit[1]) {
$cookieinfo['time'] = strtotime(preg_replace("/^expires=/i", "", trim($cookiesplit[1])));
}
// path
if ( $cookiesplit[2]) {
$cookieinfo['path'] = preg_replace("/^path=/i", "", trim($cookiesplit[2]));
}
//domain
if ( $cookiesplit[3]) {
$cookieinfo['domain'] = preg_replace("/^domain=/i", "", trim($cookiesplit[3]));
}
// secure
if ( strtolower(trim($cookiesplit[4]))=="secure") {
$cookieinfo['secure'] = true;
}
$cookies[] = $cookieinfo;
}
}
return $cookies;
}
// return all headers as array
// format: array("HTTP/1.1 200 OK","Date: Thu, 27 May 2004 20:33:10 GMT")
function get_headers()
{
return split ("\r\n",$this->get_header());
}
// return date
// return string like "Date: Thu, 27 May 2004 20:33:10 GMT"
function get_date()
{
$head = $this->get_headers();
reset($head);
foreach($head as $headline) {
if (preg_match('/^Date: /i',$headline)) {
$date = preg_replace('/^Date: /i', '', $headline);
return $date;
}
}
return false;
}
// return complete header as string
function get_header()
{
$tmp = split ("\r\n\r\n", $this->data,2);
if (isset($tmp[0])) {
return $tmp[0];
} else {
return "";
}
}
// return content as string
function get_content()
{
$tmp = split ("\r\n\r\n", $this->data,2);
if (isset($tmp[1])) {
return $tmp[1];
} else {
return "";
}
}
// intern, for debugging
function _fsockopen($host, $port)
{
if ( ! $this->debug) {
if ( $this->socket = fsockopen($host, $port)) {
return true;
} else {
return false;
}
} else {
return true;
}
}
// intern, for debugging
function _fputs($data)
{
if ( ! $this->debug) {
fputs($this->socket, $data);
} else {
echo $data;
}
}
// intern, for debugging
function _fgets()
{
if ( ! $this->debug) {
while(!feof($this->socket)) {
$return .= fgets($this->socket, 512);
}
return $return;
} else {
return false;
}
}
// intern, for debugging
function _fclose()
{
if ( ! $this->debug) {
fclose($this->socket);
}
}
}
