Quantcast
Channel: Rob's Area for PHP on Windows
Viewing all articles
Browse latest Browse all 7

FindProxyForURL - Parse a PAC (proxy auto config) file in PHP

$
0
0
A PAC file is a Javascript file evaluated by web browsers to determine which proxy server to use for certain URL's.

Here's a working example of using the php_spidermonkey extension to run a proxy auto config script (javascript) in PHP.




All you need is a PAC file to test it.  You can write your own following instructions across the web, here is a guide: http://www.ee.ed.ac.uk/%7Emfg/work/proxy-live.html

There is more information and Windows builds of php_spidermonkey in this article: Javascript in PHP on Windows with php_spidermonkey.  Install the extension dll in PHP in the usual way.

Here is a snippet of code that uses the class 'CProxy' (defined underneath) which executes the Javascript PAC function FindProxyForURL:


Code formatted by http://hilite.me/
use DEMO\PAC\CProxy; // namespace, if required.
 
$proxy=new CProxy();
$proxy->LoadPAC('C:\proxy.pac'); // pac file is usually a URL
$test=$proxy->FindProxyForAddress('http://www.google.com/', true);
echo$test;

/*
Assuming your PAC file contained something like this:

function FindProxyForURL(url, host)
{
if ( shExpMatch(host, "*.google.*") )
{
return "PROXY 10.20.30.40:8000; DIRECT";
}
return "DIRECT";
}

this snippet would print:
PROXY 10.20.30.40:8000; DIRECT
*/

Definition of class CProxy - it utilises and demonstrates some of the following:
  1. PHP 5.4 - uses namespaces to show how they work in this scenario, and traits.
  2. php_spidermonkey javascript interpreter
  3. PAC (proxy-auto-config) functions 

<?php
/* This example uses a namespace, just to show how it works inside namespaces.
Namespace can be removed. */

namespace DEMO\PAC;
use \JSContext, \Exception;

if ( !defined('JSVERSION_DEFAULT') )
{
error_log(__FILE__.'CProxy not loaded, php_spidermonkey extension not detected!');
return;
}
 
// global function to escape javascript strings:
functionjs($str, $quotes='\'"')
{
returnaddcslashes($str,"\\$quotes\n\r<>");
}
 
classCProxy
{
use PACFunctions; // include traits in this class, defined below.

private$pac, $js, $proxyconfig;

function__construct($pacfile=NULL, $strictJavascript=false)
{
// requires php_spidermonkey.dll extension.
$js=new JSContext();

if ( $strictJavascript )
{
$opts=$js->getOptions() | JSOPTION_STRICT | JSOPTION_WERROR;
$prev=$js->setOptions($opts);
}

$js->registerFunction(__NAMESPACE__.'\CProxy::myIpAddress', 'myIpAddress');
$js->registerFunction(__NAMESPACE__.'\CProxy::isPlainHostName', 'isPlainHostName');
$js->registerFunction(__NAMESPACE__.'\CProxy::dnsDomainIs', 'dnsDomainIs');
$js->registerFunction(__NAMESPACE__.'\CProxy::localHostOrDomainIs', 'localHostOrDomainIs');
$js->registerFunction(__NAMESPACE__.'\CProxy::isResolvable', 'isResolvable');
$js->registerFunction(__NAMESPACE__.'\CProxy::isInNet', 'isInNet');
$js->registerFunction(__NAMESPACE__.'\CProxy::dnsResolve', 'dnsResolve');
$js->registerFunction(__NAMESPACE__.'\CProxy::dnsDomainLevels', 'dnsDomainLevels');
$js->registerFunction(__NAMESPACE__.'\CProxy::shExpMatch', 'shExpMatch');
$js->registerFunction(__NAMESPACE__.'\CProxy::weekdayRange', 'weekdayRange');
$js->registerFunction(__NAMESPACE__.'\CProxy::dateRange', 'dateRange');
$js->registerFunction(__NAMESPACE__.'\CProxy::timeRange', 'timeRange');
// I don't know what ProxyConfig is used for:
$this->proxyconfig=new \stdClass();
$this->proxyconfig->bindings=array();
$js->assign('ProxyConfig', $this->proxyconfig);

$this->js=$js;
$this->LoadPAC($pacfile);
}

functionFindProxyForAddress($address, $raw=false)
{
/* returns an array of proxy servers
(from preg_match_all),
or, if $raw is true, returns
the original string returned by
FindProxyForURL function. */

if ( !$this->pac )
returnfalse;

$parts=parse_url($address);
$url=$address;

if ( !isset($parts['port']) )
{
$port='';
}
elseif ( ($port=$parts['port']) )
{
$port=':'.$port;
}

if ( !isset($parts['host']) )
{
/* $address is badly formed, not sure if this is right, but
* return it from the beginning to the first slash. */
if ( !isset($parts['path']) )
returnfalse;
$parts['host'] =preg_replace('/(.*?)(?>\/.*$|$)/', '$1', $parts['path']);
}

$host=$parts['host'] .$port;

try
{
if ( !($rv=$this->js->evaluateScript('FindProxyForURL(\''. js($url) .'\', \''. js($host) .'\');', 'FindProxyForURL')) ||$raw )
{
return$rv;
}
}
catch ( Exception $ex )
{
returnfalse;
}
return self::ReadPacResult($rv);
}

functionLoadPAC($pac)
{
if ( $this->pac )
{
returnNULL; // already loaded
}

if ( $pac===NULL||!$this->js )
{
returnfalse; // param error or not initialised properly
}

if ( !($script=file_get_contents($pac)) )
{
returnfalse; // cannot read PAC file
}

try
{
$rv=$this->js->evaluateScript($script, $pac);
}
catch ( Exception $ex )
{
returnfalse;
}

$this->pac=$script;
returntrue;
}

publicstaticfunctionReadPacResult($pacres)
{
if ( !$pacres )
returnfalse;

// $pacres should be something like
// PROXY 1.2.3.4:8080; PROXY 1.2.3.5:8080; DIRECT
if ( !preg_match_all('/(PROXY|SOCKS|DIRECT)\s*([^;]*|.*?$)/ui', $pacres, $matches, PREG_SET_ORDER) )
{
returnfalse;
}
return$matches;
}
}
?>

    PAC functions written as a trait class (remember - not tested very much!):

    Code formatted by http://hilite.me/
    <?php
    /* Define the PAC traits used in class CProxy, above. */

    namespace DEMO\PAC;
    use \DateTime, \DateTimeZone, \Exception;

    trait PACFunctions
    {
    privatestatic$s_myIP;

    /*
    * PAC functions:
    * 'global' functions to replicate javascript PAC environment...
    */

    staticfunctionmyIpAddress()
    {
    if ( self::$s_myIP )
    {
    return self::$s_myIP;
    }

    $vars=array('SERVER_ADDR', 'LOCAL_ADDR');
    $ip=NULL;
    $bestip='0.0.0.0';

    foreach ( $varsas$var )
    {
    if ( isset($_SERVER[$var]) )
    {
    $ip=$_SERVER[$var];
    if ( !empty($ip) )
    {
    if ( $ip!='127.0.0.1' )
    {
    return (self::$s_myIP=$ip);
    }
    $bestip=$ip;
    }
    }
    }

    if ( ($hn= gethostname()) && ($ip=gethostbyname($hn)) &&$ip!==$hn )
    {
    return (self::$s_myIP=$ip);
    }

    return (self::$s_myIP=$bestip);
    }

    staticfunctionisPlainHostName( $host )
    {
    returnstrpos($host, '.')===false;
    }

    staticfunctionshExpMatch($host, $pattern)
    {
    $parts=explode('*', $pattern);
    $pattern='';
    foreach( $partsas$part )
    {
    if ( $part==='' )
    {
    $pattern.='.*?';
    }
    else
    {
    $pattern.=preg_quote($part, '/');
    }
    }
    returnpreg_match('/^'.$pattern.'$/ui', $host)===1;
    }

    staticfunctionisResolvable($host)
    {
    if ( ip2long($host) !==false )
    {
    returntrue;
    }
    $test=gethostbyname($host);
    return$test&& ip2long($test) !==false;
    }

    staticfunctionisInNet($host, $ip, $subnet)
    {
    if ( ($longhost= ip2long($host)) ===false )
    {
    $host=gethostbyname($host);
    if ( ($longhost= ip2long($host))===false )
    {
    returnfalse;
    }
    }
    $longip= ip2long($ip);
    $longsub= ip2long($subnet);
    return ($longhost&$longsub) ==$longip;
    }

    staticfunctiondnsDomainIs($host, $domain)
    {
    $len=strlen($domain);
    return$len<=strlen($host) && substr($host, -$len)==$domain;
    }

    staticfunctionlocalHostOrDomainIs($host, $domain)
    {
    returnstrcasecmp($domain, substr($host, 0, strlen($domain)))===0;
    }

    staticfunctiondnsResolve($host)
    {
    returngethostbyname($host);
    }

    staticfunctiondnsDomainLevels($host)
    {
    return substr_count($host, '.');
    }

    staticfunctionweekdayRange($fr, $to=NULL, $gmt=NULL)
    {
    $days=array('mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun');
    $fr= strtolower(substr($fr,0,3));
    if ( $gmt )
    $gmt= strtoupper($gmt);
    if ( $to===NULL )
    $to=$fr;
    elseif ( strcasecmp($to, 'GMT')==0 )
    {
    $to=$fr;
    $gmt='GMT';
    }
    else
    $to= strtolower(substr($to,0,3));

    if ( $gmt=='GMT' )
    {
    $tzn='UTC';
    }
    elseif ( !($tzn=ini_get('date.timezone')) )
    {
    $tzn='Europe/London';
    }
    $tz=new DateTimeZone($tzn);
    $today= strtolower( (new DateTime('now',$tz))->format('D') );

    foreach ( $daysas$wd=>$day )
    {
    if ( $fr==$day )
    $fr=$wd+1;
    if ( $to==$day )
    $to=$wd+1;
    if ( $today==$day )
    $today=$wd+1;
    }

    if ( !(int)$today||!(int)$fr||!(int)$to )
    returnfalse;

    // FRI = 5
    // MON = 1
    // THU = 4
    // is THU in FRI - MON ??
    if ( $fr>$to )
    {
    return$today>=$fr||$today<=$to;
    }

    return$today>=$fr&&$today<=$to;
    }

    staticfunctiondateRange($day1, $month1=NULL, $year1=NULL, $day2=NULL, $month2=NULL, $year2=NULL, $gmt=NULL)
    {
    $args=array($day1, $month1, $year1, $day2, $month2, $year2, $gmt);

    $RationaliseArgs=function(&$arg)
    {
    $isgmt=0;

    for ( $ct=0 ; $ct<6 ; $ct+=3 )
    {
    if ( (int)$arg[$ct] >31 )
    {
    // 1st arg is a year, insert blank day and month
    array_splice($arg, $ct, 0, array(0,''));
    }
    elseif ( (int)$arg[$ct] )
    {
    // 1st arg is day
    if ( (int)$arg[$ct+1] )
    {
    // 2nd arg must be a day. 2nd arg can't be year if 1st arg is a day.
    array_splice($arg, $ct+1, 0, array('',0)); // insert blank month and year
    }
    elseif ( $arg[$ct+1]===NULL||strcasecmp($arg[$ct+1], 'GMT')==0 )
    {
    // nothing else after the day.
    array_splice($arg, $ct+1, 0, array('',0)); // insert blank month and year.
    }
    elseif ( (int)$arg[$ct+2] && (int)$arg[$ct+2] <=31 ) // month supplied, test what's after month
    {
    // month followed by day... insert blank year...
    array_splice($arg, $ct+2, 0, array(0)); // insert empty year1 and shift arg2 to 2nd date.
    }
    }
    elseif ( is_string($arg[$ct]) )
    {
    if ( strcasecmp($arg[$ct], 'GMT')==0 )
    {
    if ( !$ct )
    {
    returnfalse; // error, 1st arg cannot be 'GMT'.
    }
    break;
    }
    // 1st arg is month
    array_splice($arg, $ct, 0, array(0)); // insert a blank day.
    if ( (int)$arg[$ct+2] <=31 ) // month not followed by a year
    {
    // insert a blank year...
    array_splice($arg, $ct+2, 0, array(0));
    }
    }
    else
    {
    if ( $ct&&$arg[$ct] ===NULL ) // only 1 date supplied. ok.
    return1;
    returnfalse; // error
    }
    }

    if ( strcasecmp($arg[$ct], 'GMT')==0 )
    {
    $isgmt=1;
    $arg[$ct] ='GMT';
    }

    array_splice($arg, 7);

    return$ct/3;
    };

    if ( !($cd=$RationaliseArgs($args, $gmt)) )
    {
    returnfalse;
    }

    list($day1, $month1, $year1, $day2, $month2, $year2, $gmt) =$args;

    if ( $gmt=='GMT' )
    {
    $tzn='UTC';
    }
    elseif ( !($tzn=ini_get('date.timezone')) )
    {
    $tzn='Europe/London';
    }
    $tz=new DateTimeZone($tzn);
    $now=new DateTime('now',$tz);

    $thisyear=$now->format('Y');
    $thismonth='Jan'; // a month with 31 days.
    $thisday=$now->format('d');

    try
    {
    $from=new \DateTime(($year1?$year1:$thisyear) .'-'. ($month1?$month1:$thismonth) .'-'. ($day1?$day1:$thisday), $tz);
    if ( !$day1 )
    $day2=0;
    if ( !$month1 )
    $month2='';
    if ( !$year1 )
    $year2=0;
    if ( $day2||$month2||$year2 )
    {
    $to=new \DateTime(($year2?$year2:$thisyear) .'-'. ($month2?$month2:$thismonth) .'-'. ($day2?$day2:$thisday), $tz);
    }
    else
    $to=NULL;
    }
    catch ( Exception $ex )
    {
    returnfalse; // data error, an arg is incorrect
    }

    $dateformat= ($year1?'Y':'') . ($month1?'m':'') . ($day1?'d':'');

    $test=$now->format($dateformat);
    $lhs=$from->format($dateformat);

    if ( !$to )
    {
    return$test==$lhs;
    }

    $rhs=$to->format($dateformat);
    if ( $rhs<$lhs&&$dateformat{0} !='Y' )
    {
    return$test>=$lhs||$test<=$rhs;
    }
    return$test>=$lhs&&$test<=$rhs;
    }

    staticfunctiontimeRange($hour1, $min1=NULL, $sec1=NULL, $hour2=NULL, $min2=NULL, $sec2=NULL, $gmt=NULL)
    {
    $args=array($hour1, $min1, $sec1, $hour2, $min2, $sec2, $gmt);
    for ( $ct=0 ; $ct<count($args) &&$args[$ct] !==NULL&&strcasecmp($args[$ct], 'GMT') ; ++$ct ) ;

    if ( !$ct )
    returnfalse; // arg error

    if ( ($gmt= (strcasecmp($args[$ct], 'GMT')==0)) )
    {
    $tzn='UTC';
    }
    elseif ( !($tzn=ini_get('date.timezone')) )
    {
    $tzn='Europe/London';
    }
    $tz=new DateTimeZone($tzn);
    $now=new DateTime('now',$tz);

    /* $ct is the number of args (excluding any 'GMT' value).
    * if 6 args supplied, full time specified, else... */
    if ( $ct==4 ) // range of hours+minutes
    {
    // hour and minute supplied
    $min2=$hour2;
    $hour2=$sec1;
    $sec1=$sec2=0;
    $fmt='Hi';
    }
    elseif ( $ct<3 ) // range of hours (1 or 2)
    {
    $hour2= ($ct==2?$min1:$hour1);
    $min1=$sec1=$min2=$sec2=0;
    $fmt='H';
    }
    elseif ( $ct==6 )
    {
    $fmt='His';
    }
    else
    {
    returnfalse; // parameters not specified properly
    }

    try
    {
    $lhs=new \DateTime( ($dt=$now->format('Y-m-d')) .''. (int)$hour1.':'. (int)$min1.':'.$sec1, $tz);
    $rhs= (( $ct==1 ) ?NULL:new \DateTime( $dt.''. (int)$hour2.':'. (int)$min2.':'.$sec2, $tz));
    }
    catch ( Exception $ex )
    {
    // datetime not recognised, so parameter probably out of range
    returnfalse;
    }

    $nowtime=$now->format($fmt);
    $ltime=$lhs->format($fmt);
    if ( $rhs===NULL )
    {
    return$nowtime==$ltime;
    }
    $rtime=$rhs->format($fmt);
    if ( $ltime>$rtime )
    {
    return$nowtime>=$ltime||$nowtime<=$rtime;
    }
    return$nowtime>=$ltime&&$nowtime<=$rtime;
    }
    }
    ?>

    Viewing all articles
    Browse latest Browse all 7

    Trending Articles