Skip to content

Commit

Permalink
fixing ipv6 lookups.
Browse files Browse the repository at this point in the history
  • Loading branch information
padams committed Jan 29, 2022
2 parents 67f08da + 085df7e commit 753ab96
Show file tree
Hide file tree
Showing 8 changed files with 358 additions and 154 deletions.
52 changes: 52 additions & 0 deletions conf/searchengines.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
<?php

return [

['domain' => 'google', 'query_param' =>'q'],
['domain' => 'yahoo', 'query_param' => 'p'],
['domain' => 'msn', 'query_param' => 'q'],
['domain' => 'bing', 'query_param' => 'q'],
['domain' => 'images.google', 'query_param' => 'q'],
['domain' => 'images.search.yahoo.com', 'query_param' => 'p'],
['domain' => 'aol', 'query_param' => 'query'],
['domain' => 'aol', 'query_param' => 'encquery'],
['domain' => 'aol', 'query_param' => 'q'],
['domain' => 'lycos', 'query_param' => 'query'],
['domain' => 'ask', 'query_param' => 'q'],
['domain' => 'altavista', 'query_param' => 'q'],
['domain' => 'netscape', 'query_param' => 'query'],
['domain' => 'cnn', 'query_param' => 'query'],
['domain' => 'about', 'query_param' => 'terms'],
['domain' => 'mamma', 'query_param' => 'q'],
['domain' => 'daum', 'query_param' => 'q'],
['domain' => 'eniro', 'query_param' => 'search_word'],
['domain' => 'naver', 'query_param' => 'query'],
['domain' => 'pchome', 'query_param' => 'q'],
['domain' => 'alltheweb', 'query_param' => 'q'],
['domain' => 'voila', 'query_param' => 'rdata'],
['domain' => 'virgilio', 'query_param' => 'qs'],
['domain' => 'live', 'query_param' => 'q'],
['domain' => 'baidu', 'query_param' => 'wd'],
['domain' => 'alice', 'query_param' => 'qs'],
['domain' => 'yandex', 'query_param' => 'text'],
['domain' => 'najdi', 'query_param' => 'q'],
['domain' => 'mama', 'query_param' => 'query'],
['domain' => 'seznam', 'query_param' => 'q'],
['domain' => 'search', 'query_param' => 'q'],
['domain' => 'wp', 'query_param' => 'szukaj'],
['domain' => 'onet', 'query_param' => 'qt'],
['domain' => 'szukacz', 'query_param' => 'q'],
['domain' => 'yam', 'query_param' => 'k'],
['domain' => 'kvasir', 'query_param' => 'q'],
['domain' => 'sesam', 'query_param' => 'q'],
['domain' => 'ozu', 'query_param' => 'q'],
['domain' => 'terra', 'query_param' => 'query'],
['domain' => 'mynet', 'query_param' => 'q'],
['domain' => 'ekolay', 'query_param' => 'q'],
['domain' => 'rambler', 'query_param' => 'query'],
['domain' => 'rambler', 'query_param' => 'words'],
['domain' => 'duckduckgo', 'query_param' => 'q']
];


?>
13 changes: 13 additions & 0 deletions conf/socialnetworks.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<?php

return [

['domain' => 'facebook'],
['domain' => 'twitter'],
['domain' => 'pinterest'],
['domain' => 'instagram'],
['domain' => 'linkedin'],
['domain' => 't.co']
];

?>
233 changes: 231 additions & 2 deletions modules/base/classes/trackingEventHelpers.php
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,234 @@ static function derivePageUri( $page_uri, $event ) {

return $page_parse['path'] ;
}
}

static function deriveMedium( $medium, $event ) {

// respect what was already set by the tracker
if ( $medium ) {

return $medium;
}

if ( $event->get( 'session_referer' ) ) {

// check for referrer url
$ref = $event->get('session_referer');

if ( $ref ) {

// parse the referrer url
$uri = self::parse_url( $ref );

$host = $uri['host'];

$medium = 'referral';

// check if referral is a search engine
$engine = self::isSearchEngine( $host );

if ( $engine ) {

$medium = 'organic-search';
}

if ( ! $engine ) {

// check if referral is a social network
$network = self::issocialNetwork( $host );

if ( $network ) {

$medium = 'social-network';
}
}
}

return $medium;
}
}

/**
* Use this function to parse out the url and query array element from
* a url.
*/
public static function parse_url( $url ) {

$url = parse_url($url);

if ( isset( $url['query'] ) ) {
$var = $url['query'];

$var = html_entity_decode($var);
$var = explode('&', $var);
$arr = array();

foreach( $var as $val ) {

if ( strpos($val, '=') ) {
$x = explode('=', $val);

if ( isset( $x[1] ) ) {
$arr[$x[0]] = urldecode($x[1]);
}
} else {
$arr[$val] = '';
}
}
unset($val, $x, $var);

$url['query_params'] = $arr;

}

return $url;
}


static function deriveSource( $source, $event ) {

// respect what was already set by the tracker
if ( $source ) {

return $source;
}


if ( $event->get( 'session_referer' ) ) {

$ref = $event->get( 'session_referer' );
$uri = self::parse_url( $ref );

$host = $uri['host'];

if ($host) {

$source = self::stripWwwFromDomain( $host );
return $source;
}
}
}

static function stripWWWFromDomain( $domain ) {

$done = false;
$part = substr( $domain, 0, 5 );
if ($part === '.www.') {
//strip .www.
$domain = substr( $domain, 5);
// add back the leading period
$domain = '.'.$domain;
$done = true;
}

if ( ! $done ) {
$part = substr( $domain, 0, 4 );
if ($part === 'www.') {
//strip .www.
$domain = substr( $domain, 4);
$done = true;
}

}

return $domain;
}

static function isSearchEngine( $host ) {

if ( ! $host ) {

return;
}

$searchEngine = [];

$organicSearchEngines = self::getSearchEngineList();

foreach ( $organicSearchEngines as $engine ) {

$domain = $engine['domain'];

if ( strpos( $host, $domain ) ) {

owa_coreAPI::debug( 'Found search engine: '. $domain);

return true;
}
}
}

static function extractSearchTerm( $term, $event ) {

if ( $term ) {

return $term;
}

if ( $event->get( 'session_referer' ) ) {

// check for referrer url
$ref = $event->get( 'session_referer' );

$uri = self::parse_url( $ref );
owa_coreAPI::debug($uri);
// check for query params, search engine might have sent them under https
if ( array_key_exists('query_params', $uri) && ! empty( $uri['query_params'] ) ) {

$host = $uri['host'];

$organicSearchEngines = self::getSearchEngineList();

foreach ( $organicSearchEngines as $engine ) {

$domain = $engine['domain'];

if ( strpos( $host, $domain) ) {

$query_param = $engine['query_param'];
$term = '';

if (isset($uri['query_params'][$query_param])) {

$term = $uri['query_params'][$query_param];
owa_coreAPI::debug( 'Found search term: ' . $term);

} else {

$term = '(not provided)';
}
// need urldecode here ot clean up the "+" characters in the term
return trim( urldecode( strtolower( $term ) ) );
}
}
}
}
}

static function isSocialNetwork( $host ) {

$social_networks = self::getSocialNetworkList();

foreach ( $social_networks as $network ) {

if ( strpos( $host, $network['domain'] ) ) {

owa_coreAPI::debug( 'Found social network: %s', $network['domain'] );

return true;
}
}
}

static function getSearchEngineList() {

return owa_coreAPI::loadConf( 'searchengines.php', 'tracking.search_engine_registry' );
}

static function getSocialNetworkList() {

return owa_coreAPI::loadConf( 'socialnetworks.php', 'tracking.social_network_registry' );
}

/**
Expand Down Expand Up @@ -591,11 +819,11 @@ static function utfEncodeProperty( $string, $event ) {
*/
static function resolveFullHost( $full_host, $event ) {

// See if host is already resolved
if (
( $event->get('REMOTE_HOST') === '(not set)' || $event->get('REMOTE_HOST') === 'localhost' )
&& $event->get( 'ip_address' )
&& owa_coreAPI::getSetting('base', 'resolve_hosts')

) {

$remote_host = '';
Expand All @@ -622,7 +850,7 @@ static function resolveFullHost( $full_host, $event ) {
$remote_host = @gethostbyaddr( $ip_address );
}
}

// if we get a host back that is not an ip address or unknown
if ( $remote_host && $remote_host != $ip_address && $remote_host != 'unknown' ) {

Expand Down Expand Up @@ -699,6 +927,7 @@ static function resolveOs ( $os, $event ) {
}

static function resolveEntryPage( $is_entry_page, $event ) {

return $event->get('is_new_session') ? true : false;
}

Expand Down
28 changes: 15 additions & 13 deletions modules/base/module.php
Original file line number Diff line number Diff line change
Expand Up @@ -225,30 +225,32 @@ public function setupTrackingProperties() {
'data_type' => 'url',
'callbacks' => array( 'owa_trackingEventHelpers::makeUrlCanonical' )
),


'session_referer' => array(
'required' => false,
'data_type' => 'url',
'callbacks' => array()
),
// must come after session_referer
'source' => array(
'required' => true,
'data_type' => 'string',
'callbacks' => array( 'owa_trackingEventHelpers::lowercaseString' ),
'callbacks' => array( 'owa_trackingEventHelpers::lowercaseString', 'owa_trackingEventHelpers::deriveSource' ),
'default_value' => '(not set)'
),

// must come after session_referer
'medium' => array(
'required' => true,
'data_type' => 'string',
'callbacks' => array( 'owa_trackingEventHelpers::lowercaseString' ),
'default_value' => '(not set)'
),

'session_referer' => array(
'required' => false,
'data_type' => 'url',
'callbacks' => array()
'callbacks' => array( 'owa_trackingEventHelpers::lowercaseString', 'owa_trackingEventHelpers::deriveMedium' ),
'default_value' => 'direct'
),

// must come after session_referer
// @todo investigate if this should be a required property so that a proper join can occur.
'search_terms' => array(
'required' => false,
'callbacks' => array( 'owa_trackingEventHelpers::setSearchTerms' ),
'required' => true,
'callbacks' => array( 'owa_trackingEventHelpers::extractSearchTerm' ),
'default_value' => '(not set)'

),
Expand Down
Loading

0 comments on commit 753ab96

Please sign in to comment.