Php CURL模拟登陆论坛并采集数据实例
要模拟浏览器访问网站,首选要学会观察浏览器是如何发送http报文的,以及网站服务器返回给浏览器 是什么样的内容,我推荐安装一个国外人开发的httpwatch的软件,最好搞个破解的版本,否则有些功能是使用不了的,这个软件安装完成之后是嵌入在 IE里的,启动Record,在地址栏输入网址后回车,它就会将浏览器和服务器之间的所有通讯扫描出来,让你一览无遗,关于这个软件的使用在本文不做介绍.
模拟浏览器登陆应用开发,最关键的地方是突破登陆验证,CURL技术不只支持http,还支持https,区别就在多了一层SSL加密传输,如果是要登陆 https网站,php记得要支持openssl,还是先拿一个例子来分析,代码如下:
- <?php
- $discuz_url = 'http://127.0.0.1/discuz/'; //论坛地址
- $login_url = $discuz_url . 'logging.php?action=login';
-
- $post_fields = array();
-
- $post_fields['loginfield'] = 'username';
- $post_fields['loginsubmit'] = 'true';
-
- $post_fields['username'] = 'tianxin';
- $post_fields['password'] = '111111';
-
- $post_fields['questionid'] = 0;
- $post_fields['answer'] = '';
-
- $post_fields['seccodeverify'] = '';
-
-
- $ch = curl_init($login_url);
- curl_setopt($ch, CURLOPT_HEADER, 0);
- curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
- $contents = curl_exec($ch);
- curl_close($ch);
- preg_match('/<inputs*type="hidden"s*name="formhash"s*value="(.*?)"s*/>/i', $contents, $matches);
- if (!emptyempty($matches)) {
- $formhash = $matches[1];
- } else {
- die('Not found the forumhash.');
- }
-
-
- $cookie_file = tempnam('./temp', 'cookie');
-
- $ch = curl_init($login_url);
- curl_setopt($ch, CURLOPT_HEADER, 0);
- curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
- curl_setopt($ch, CURLOPT_POST, 1);
- curl_setopt($ch, CURLOPT_POSTFIELDS, $post_fields);
- curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie_file);
- curl_exec($ch);
- curl_close($ch);
-
-
- $send_url = $discuz_url . "post.php?action=newthread&fid=2";
-
-
- $ch = curl_init($send_url);
- curl_setopt($ch, CURLOPT_HEADER, 0);
- curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
- curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie_file);
- $contents = curl_exec($ch);
- curl_close($ch);
-
-
- preg_match('/<inputs*type="hidden"s*name="formhash"s*id="formhash"s*value="(.*?)"s*/>/i', $contents, $matches);
- if (!emptyempty($matches)) {
- $formhash = $matches[1];
- } else {
- die('Not found the forumhash.');
- }
-
-
- $post_data = array();
-
- $post_data['subject'] = 'test2';
-
- $post_data['message'] = 'test2';
- $post_data['topicsubmit'] = "yes";
- $post_data['extra'] = '';
-
- $post_data['tags'] = 'test';
-
- $post_data['formhash'] = $formhash;
-
-
- $ch = curl_init($send_url);
- curl_setopt($ch, CURLOPT_REFERER, $send_url);
- curl_setopt($ch, CURLOPT_HEADER, 0);
- curl_setopt($ch, CURLOPT_RETURNTRANSFER, 0);
- curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie_file);
- curl_setopt($ch, CURLOPT_POST, 1);
- curl_setopt($ch, CURLOPT_POSTFIELDS, $post_data);
- $contents = curl_exec($ch);
- curl_close($ch);
-
-
- unlink($cookie_file);
- ?>
CURL实现网站模拟登陆,代码如下:
- <?php
- $cookie_file=tempnam('./temp','cookie');$login_url='/bbs/logging.php?action=login&loginsubmit=yes';$post_fields='username=用户名&password=用户密码&referer=index.php&formhash=24eca8af&loginfield=username&questionid=0&loginsubmit=登录';$ch = curl_init($login_url);curl_setopt($ch,CURLOPT_HEADER,0);curl_setopt($ch,CURLOPT_RETURNTRANSFER,1);curl_setopt($ch,CURLOPT_POST,1);curl_setopt($ch,CURLOPT_POSTFIELDS,$post_fields);curl_setopt($ch,CURLOPT_COOKIEJAR,$cookie_file);curl_exec($ch);curl_close($ch);$url='/bbs';$ch =curl_init($url);curl_setopt($ch,CURLOPT_HEADER,0);curl_setopt($ch,CURLOPT_RETURNTRANSFER,1);curl_setopt($ch,CURLOPT_COOKIEFILE,$cookie_file);$contents=curl_exec($ch);echo $contents;curl_close($ch);
- ?>
|