Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow each request to specifiy additional HTTP headers #18

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 46 additions & 47 deletions parallelcurl.php
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
//
// The callback should take four arguments. The first is a string containing the content found at
// the URL. The second is the original URL requested, the third is the curl handle of the request that
// can be queried to get the results, and the fourth is the arbitrary 'cookie' value that you
// can be queried to get the results, and the fourth is the arbitrary 'cookie' value that you
// associated with this object. This cookie contains user-defined data.
//
// By Pete Warden <[email protected]>, freely reusable, see http://petewarden.typepad.com for more
Expand All @@ -37,29 +37,27 @@ class ParallelCurl {

public $max_requests;
public $options;

public $outstanding_requests;
public $multi_handle;

public function __construct($in_max_requests = 10, $in_options = array()) {
$this->max_requests = $in_max_requests;
$this->options = $in_options;

$this->outstanding_requests = array();
$this->multi_handle = curl_multi_init();
}
//Ensure all the requests finish nicely

// Ensure all the requests finish nicely
public function __destruct() {
$this->finishAllRequests();
$this->finishAllRequests();
}

// Sets how many requests can be outstanding at once before we block and wait for one to
// finish before starting the next one
public function setMaxRequests($in_max_requests) {
$this->max_requests = $in_max_requests;
}

// Sets the options to pass to curl, using the format of curl_setopt_array()
public function setOptions($in_options) {

Expand All @@ -69,34 +67,38 @@ public function setOptions($in_options) {
// Start a fetch from the $url address, calling the $callback function passing the optional
// $user_data value. The callback should accept 3 arguments, the url, curl handle and user
// data, eg on_request_done($url, $ch, $user_data);
public function startRequest($url, $callback, $user_data = array(), $post_fields=null) {
public function startRequest($url, $callback, $user_data = array(), $post_fields = null, $headers = null) {
if( $this->max_requests > 0 ) {
$this->waitForOutstandingRequestsToDropBelow($this->max_requests);
}

if( $this->max_requests > 0 )
$this->waitForOutstandingRequestsToDropBelow($this->max_requests);

$ch = curl_init();
curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
curl_setopt_array($ch, $this->options);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt_array($ch, $this->options);

if (isset($post_fields)) {
curl_setopt($ch, CURLOPT_POST, TRUE);
curl_setopt($ch, CURLOPT_POSTFIELDS, $post_fields);
}


if (isset($headers)) {
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
}

curl_multi_add_handle($this->multi_handle, $ch);

$ch_array_key = (int)$ch;

$this->outstanding_requests[$ch_array_key] = array(
'url' => $url,
'callback' => $callback,
'user_data' => $user_data,
);

$this->checkForCompletedRequests();
}

// You *MUST* call this function at the end of your script. It waits for any running requests
// to complete, and calls their callback functions
public function finishAllRequests() {
Expand All @@ -105,71 +107,68 @@ public function finishAllRequests() {

// Checks to see if any of the outstanding requests have finished
private function checkForCompletedRequests() {
/*
/*
// Call select to see if anything is waiting for us
if (curl_multi_select($this->multi_handle, 0.0) === -1)
return;

// Since something's waiting, give curl a chance to process it
do {
$mrc = curl_multi_exec($this->multi_handle, $active);
} while ($mrc == CURLM_CALL_MULTI_PERFORM);
*/

// fix for https://bugs.php.net/bug.php?id=63411
do {
$mrc = curl_multi_exec($this->multi_handle, $active);
} while ($mrc == CURLM_CALL_MULTI_PERFORM);

while ($active && $mrc == CURLM_OK) {
if (curl_multi_select($this->multi_handle) != -1) {
do {
$mrc = curl_multi_exec($this->multi_handle, $active);
} while ($mrc == CURLM_CALL_MULTI_PERFORM);
}
else
return;
}
do {
$mrc = curl_multi_exec($this->multi_handle, $active);
} while ($mrc == CURLM_CALL_MULTI_PERFORM);

while ($active && $mrc == CURLM_OK) {
if (curl_multi_select($this->multi_handle) != -1) {
do {
$mrc = curl_multi_exec($this->multi_handle, $active);
} while ($mrc == CURLM_CALL_MULTI_PERFORM);
} else {
return;
}
}

// Now grab the information about the completed requests
while ($info = curl_multi_info_read($this->multi_handle)) {

$ch = $info['handle'];
$ch_array_key = (int)$ch;

if (!isset($this->outstanding_requests[$ch_array_key])) {
die("Error - handle wasn't found in requests: '$ch' in ".
print_r($this->outstanding_requests, true));
}

$request = $this->outstanding_requests[$ch_array_key];

$url = $request['url'];
$content = curl_multi_getcontent($ch);
$callback = $request['callback'];
$user_data = $request['user_data'];

call_user_func($callback, $content, $url, $ch, $user_data);

unset($this->outstanding_requests[$ch_array_key]);

curl_multi_remove_handle($this->multi_handle, $ch);
}

}

// Blocks until there's less than the specified number of requests outstanding
private function waitForOutstandingRequestsToDropBelow($max)
{
private function waitForOutstandingRequestsToDropBelow($max) {
while (1) {
$this->checkForCompletedRequests();
if (count($this->outstanding_requests)<$max)
break;
break;

usleep(10000);
}
}

}


?>
8 changes: 4 additions & 4 deletions test.php
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#!/usr/bin/php
<?php
//
//
// A test script for the ParallelCurl class
//
//
// This example fetches a 100 different results from Google's search API, with no more
// than 10 outstanding at any time.
//
Expand All @@ -14,8 +14,8 @@

// This function gets called back for each request that completes
function on_request_done($content, $url, $ch, $search) {
$httpcode = curl_getinfo($ch, CURLINFO_HTTP_CODE);

$httpcode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
if ($httpcode !== 200) {
print "Fetch error $httpcode for '$url'\n";
return;
Expand Down