PHP的cURL库简介及使用示例
内容摘要
使用PHP的cURL库可以简单和有效地去抓网页。你只需要运行一个脚本,然后分析一下你所抓取的网页,然后就可以以程序的方式得到你想要的数据了。无论是你想从从一个链接上取部分
文章正文
使用PHP的cURL库可以简单和有效地去抓网页。你只需要运行一个脚本,然后分析一下你所抓取的网页,然后就可以以程序的方式得到你想要的数据了。无论是你想从从一个链接上取部分数据,或是取一个XML文件并把其导入数据库,那怕就是简单的获取网页内容,cURL 是一个功能强大的PHP库。
PHP中的CURL函数库(Client URL Library Function)
//@todo验证码
$post_fields['seccodeverify'] = '';
//获取表单FORMHASH
$ch = curl_init($login_url);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
$contents = curl_exec($ch);
curl_close($ch);
preg_match('/<input\s*type="hidden"\s*name="formhash"\s*value="(.*?)"\s*\/>/i', $contents, $matches);
if(!empty($matches)) {
$formhash = $matches[1];
} else {
die('Not found the forumhash.');
}
//POST数据,获取COOKIE
$cookie_file = dirname(__FILE__) . '/cookie.txt';
//$cookie_file = tempnam('/tmp');
$ch = curl_init($login_url);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $post_fields);
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie_file);
curl_exec($ch);
curl_close($ch);
//带着上面得到的COOKIE获取需要登录后才能查看的页面内容
$ch = curl_init($get_url);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 0);
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie_file);
$contents = curl_exec($ch);
curl_close($ch);
var_dump($contents);
?>
$post_fields['seccodeverify'] = '';
//获取表单FORMHASH
$ch = curl_init($login_url);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
$contents = curl_exec($ch);
curl_close($ch);
preg_match('/<input\s*type="hidden"\s*name="formhash"\s*value="(.*?)"\s*\/>/i', $contents, $matches);
if(!empty($matches)) {
$formhash = $matches[1];
} else {
die('Not found the forumhash.');
}
//POST数据,获取COOKIE
$cookie_file = dirname(__FILE__) . '/cookie.txt';
//$cookie_file = tempnam('/tmp');
$ch = curl_init($login_url);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $post_fields);
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie_file);
curl_exec($ch);
curl_close($ch);
//带着上面得到的COOKIE获取需要登录后才能查看的页面内容
$ch = curl_init($get_url);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 0);
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie_file);
$contents = curl_exec($ch);
curl_close($ch);
var_dump($contents);
?>
以上就是本文的全部内容了,希望大家能够喜欢。
代码注释