2012-03-17 14 views
2

apache HttpClient apiを使用して、Webサイト(rfp.ca)にログインし、htmlコンテンツを取得したい場合は、自分のコードを使用します。私はそれを実行した後は、それが唯一のログインページのソースコードを返さapache HttpClient、フォームベースのログイン、およびHTMLコンテンツの取得

http://www.rfp.ca/login/ まだ資格情報を要求し、私はそれがブラウザを使用してちょうどログインようhttp://www.rfp.ca/my_account/にリダイレクトされるように期待していました。

これを実装する方法に関するアイデアはありますか?

ありがとうございます。ここ

エリック

import java.io.BufferedReader; 
import java.io.InputStream; 
import java.io.InputStreamReader; 
import java.util.ArrayList; 
import java.util.List; 
import org.apache.http.HttpEntity; 
import org.apache.http.HttpResponse; 
import org.apache.http.NameValuePair; 
import org.apache.http.client.entity.UrlEncodedFormEntity; 
import org.apache.http.client.methods.HttpGet; 
import org.apache.http.client.methods.HttpPost; 
import org.apache.http.cookie.Cookie; 
import org.apache.http.impl.client.DefaultHttpClient; 
import org.apache.http.message.BasicNameValuePair; 
import org.apache.http.protocol.HTTP; 



public class HttpGetter { 



     public static void main(String[] args) throws Exception { 

      DefaultHttpClient httpclient = new DefaultHttpClient(); 

      HttpGet httpget = new HttpGet("http://www.rfp.ca/login/"); 

      HttpResponse response = httpclient.execute(httpget); 
      HttpEntity entity = response.getEntity(); 

      System.out.println("Login form get: " + response.getStatusLine()); 
      if (entity != null) { 
       entity.consumeContent(); 
      } 
      System.out.println("Initial set of cookies:"); 
      List<Cookie> cookies = httpclient.getCookieStore().getCookies(); 
      if (cookies.isEmpty()) { 
       System.out.println("None"); 
      } else { 
       for (int i = 0; i < cookies.size(); i++) { 
        System.out.println("- " + cookies.get(i).toString()); 
       } 
      } 

      HttpPost httpost = new HttpPost("http://www.rfp.ca/login/"); 

      List <NameValuePair> nvps = new ArrayList <NameValuePair>(); 
      nvps.add(new BasicNameValuePair("username", "myusername")); 
      nvps.add(new BasicNameValuePair("password", "mypassword")); 

      httpost.setEntity(new UrlEncodedFormEntity(nvps, HTTP.UTF_8)); 

      response = httpclient.execute(httpost); 

      System.out.println("Response "+response.toString()); 
      entity = response.getEntity(); 

      System.out.println("Login form get: " + response.getStatusLine()); 
      if (entity != null) { 

       InputStream is = entity.getContent(); 
       BufferedReader br = new BufferedReader(new InputStreamReader(is)); 
       String str =""; 
       while ((str = br.readLine()) != null){ 
        System.out.println(""+str); 
       } 
      } 

      System.out.println("Post logon cookies:"); 
      cookies = httpclient.getCookieStore().getCookies(); 
      if (cookies.isEmpty()) { 
       System.out.println("None"); 
      } else { 
       for (int i = 0; i < cookies.size(); i++) { 
        System.out.println("- " + cookies.get(i).toString()); 
       } 
      } 
      httpclient.getConnectionManager().shutdown();   

    } 
} 

は、返された結果である:

Login form get: HTTP/1.1 200 OK 
Initial set of cookies: 
- [version: 0][name: PHPSESSID][value: f4dc36acc705b31b15b4ea07a398a60b][domain: www.rfp.ca][path: /][expiry: null] 
Response HTTP/1.1 200 OK [Date: Sat, 17 Mar 2012 04:04:49 GMT, Server: Apache/2.2.22 (Unix) mod_ssl/2.2.22 OpenSSL/1.0.0-fips mod_auth_passthrough/2.1 mod_bwlimited/1.4 FrontPage/5.0.2.2635, X-Powered-By: PHP/5.2.17, Expires: Thu, 19 Nov 1981 08:52:00 GMT, Cache-Control: no-store, no-cache, must-revalidate, post-check=0, pre-check=0, Pragma: no-cache, Vary: Accept-Encoding,User-Agent, Connection: close, Transfer-Encoding: chunked, Content-Type: text/html;charset=utf-8] 
Login form get: HTTP/1.1 200 OK 
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" 

    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> 

<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en-US" lang="en-US"> 

    <head> 

<meta name="keywords" content="" /> 

<meta http-equiv="Content-Type" content="text/html charset=utf-8"/>  

<title>RFP.ca: User login form</title> 

<link rel="StyleSheet" type="text/css" href="http://www.rfp.ca/templates/_system/main/images/css/form.css" /> 

<link rel="StyleSheet" type="text/css" href="http://www.rfp.ca/templates/rfp/main/images/design.css" /> 


<link rel="shortcut icon" href="/favicon.ico" type="image/x-icon"> 

<link rel="icon" href="/favicon.ico" type="image/x-icon"> 

<link rel="stylesheet" href="http://www.rfp.ca/system/lib/rating/style.css" type="text/css" /> 

<link rel="StyleSheet" type="text/css" href="http://www.rfp.ca/system/ext/jquery/css/jquery-ui.css" /> 

<script language="JavaScript" type="text/javascript" src="http://www.rfp.ca/system/ext/jquery/jquery.js"></script> 

<script language="JavaScript" type="text/javascript" src="http://www.rfp.ca/system/ext/jquery/jquery-ui.js"></script> 

<script language="JavaScript" type="text/javascript" src="http://www.rfp.ca/system/ext/jquery/jquery.validate.min.js"></script> 






<style type="text/css"> 

*html img, 

*html.png 

{ 

    azimuth: expression(

    this.pngSet? 

     this.pngSet=true : 

     (this.nodeName == "IMG" ? 

      (this.src.toLowerCase().indexOf('.png')>-1 ? 

      (this.runtimeStyle.backgroundImage = "none", this.runtimeStyle.filter = "progid:DXImageTransform.Microsoft.AlphaImageLoader(src='" + this.src + "', sizingMethod='image')", 

       this.src = "http://www.rfp.ca/templates/rfp/main/images/blank.gif") : 

      '') :   

      (this.currentStyle.backgroundImage.toLowerCase().indexOf('.png')>-1) ? 

      (this.origBg = (this.origBg) ? 

       this.origBg :    

       this.currentStyle.backgroundImage.toString().replace('url("','').replace('")',''), 

       this.runtimeStyle.filter = "progid:DXImageTransform.Microsoft.AlphaImageLoader(src='" + this.origBg + "', sizingMethod='crop')", 

       this.runtimeStyle.backgroundImage = "none") : 

      '' 

     ), this.pngSet=true 

); 

} 

</style> 



<script type="text/javascript"> 

    var _gaq = _gaq || []; 
    _gaq.push(['_setAccount', 'UA-254707-12']); 
    _gaq.push(['_trackPageview']); 

    (function() { 
    var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true; 
    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js'; 
    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s); 
    })(); 

</script> 

    </head> 

<body> 

<div id="messageBox"></div> 

<div class="MainDiv"> 
    <div class="headerPage"> 
     <div class="logo"> 
      <div class="png"></div> 
      <a href="http://www.rfp.ca/"><img src="http://www.rfp.ca/templates/rfp/main/images/logo.png" border="0" alt="" title="" /></a> 
     </div> 
     <div class="userMenu"> 
          <a href="http://www.rfp.ca/" title="RFP Home"> Home</a> &nbsp; &nbsp; <img src="http://www.rfp.ca/templates/rfp/main/images/sepDot.png" border="0" alt="" /> &nbsp; &nbsp; 
       <a href="http://www.rfp.ca/find_rfps/" title="Search">Search</a> &nbsp; &nbsp; <img src="http://www.rfp.ca/templates/rfp/main/images/sepDot.png" border="0" alt="" /> &nbsp; &nbsp; 
       <a href="http://www.rfp.ca/rfp_alerts/?action=new" title="E-mail Alert">E-mail Alert</a> &nbsp; &nbsp; <img src="http://www.rfp.ca/templates/rfp/main/images/sepDot.png" border="0" alt="" /> &nbsp; &nbsp; 
       <a href="http://www.rfp.ca/contact/" title="Contact">Contact</a> &nbsp; &nbsp; <img src="http://www.rfp.ca/templates/rfp/main/images/sepDot.png" border="0" alt="" /> &nbsp; &nbsp; 
       <a href="http://www.rfp.ca/login/" title="Sign In"> Sign In</a> 
         <br/><br/> 
<!--    
      <form id="langSwitcherForm" method="get" action=""> 
       <select name="lang" onchange="location.href='http://www.rfp.ca/login/?lang='+this.value+'&amp;'" style="width: 200px;"> 
              <option value="de">Deutsch</option> 
              <option value="tr">Türkçe</option> 
              <option value="ps">پښتو</option> 
              <option value="fr">Français</option> 
              <option value="ar">العربية</option> 
              <option value="en" selected="selected">English</option> 
              <option value="fa">فارسی</option> 
              <option value="ja">日本語</option> 
              <option value="es">Español</option> 
              <option value="nl">Nederlands</option> 
              <option value="ru">–†—É—Å—Å–∫–∏–π</option> 
              <option value="pt">Português</option> 
            </select> 
      </form> 
--> 

     </div> 
    </div> 
    <div class="clr"><br /></div> 

<div class="indexDiv" > 







    <h1>Sign In</h1> 


    <p style="color:#9B9B9B"><i>Tip: Username is your e-mail address</i></p> 

    <form action="http://www.rfp.ca/login/" method="post" id="loginForm" > 

     <input type="hidden" name="return_url" value="" /> 

     <input type="hidden" name="action" value="login" /> 


     <fieldset> 

      <div class="inputName">Username</div> 

      <div class="inputField"><input type="text" class="logInNameInput" name="username" /></div> 

     </fieldset> 

     <fieldset> 

      <div class="inputName">Password</div> 

      <div class="inputField"><input class="logInPassInput2" type="password" name="password" /></div> 

     </fieldset> 

     <fieldset> 

      <div class="inputName">&nbsp;</div> 

      <div class="inputField"><input type="checkbox" name="keep" /> Keep me signed in</div> 

     </fieldset> 

     <fieldset> 

      <div class="inputName">&nbsp;</div> 

      <div class="inputField"><input type="submit" value="Login" class="button" /></div> 

     </fieldset> 

    </form> 

    <br/> 

    <a href="http://www.rfp.ca/password_recovery/">Forgot Your Password?</a>&nbsp;|&nbsp; <a href="http://www.rfp.ca/registration/">Subscription</a> 



</div> 

<div id="grayBgBanner"></div> 

    <div class="clr"><br /></div> 
    <div class="bottomMenu"> 
     <a href="http://www.rfp.ca/">Home</a> <img src="http://www.rfp.ca/templates/rfp/main/images/sepDot.png" border="0" alt=""> 
     <a href="http://www.rfp.ca/faq/"> About Us/FAQ</a> <img src="http://www.rfp.ca/templates/rfp/main/images/sepDot.png" border="0" alt=""> 
     <a href="http://www.rfp.ca/features/"> Features</a> <img src="http://www.rfp.ca/templates/rfp/main/images/sepDot.png" border="0" alt=""> 
     <a href="http://www.rfp.ca/contact/" >Contact</a> <img src="http://www.rfp.ca/templates/rfp/main/images/sepDot.png" border="0" alt=""> 
     <a href="http://www.rfp.ca/privacy_policy/">Privacy Policy</a> <img src="http://www.rfp.ca/templates/rfp/main/images/sepDot.png" border="0" alt="">   
     <a href="http://www.rfp.ca/terms_of_use/">Terms of use</a> <img src="http://www.rfp.ca/templates/rfp/main/images/sepDot.png" border="0" alt=""> 
     &nbsp;&copy; 2012 Organized Media &nbsp;<img src="http://www.rfp.ca/templates/rfp/main/images/sepDot.png" border="0" alt="">  
     <a href="http://www.twitter.com/rfpca" rel="me" target="_blank"><img src="http://www.rfp.ca/templates/rfp/main/images/twitter.gif" border="0" alt="Twitter"></a><a href="http://www.facebook.com/pages/RFPca/164233376967738" rel="me" target="_blank"><img src="http://www.rfp.ca/templates/rfp/main/images/facebook.gif" border="0" alt="Facebook"></a>  
     </div> 
</div> 
<div class="Footer"> 
</div> 
</body> 

</html> 

Post logon cookies: 
- [version: 0][name: PHPSESSID][value: f4dc36acc705b31b15b4ea07a398a60b][domain: www.rfp.ca][path: /][expiry: null] 

私はこれら2つのパラメータ追加してみました:

nvps.add(new BasicNameValuePair("return_url", "http://www.rfp.ca/my_account/")); 
nvps.add(new BasicNameValuePair("action", "login")); 

をし、それはいくつかのエラーが判明:

Login form get: HTTP/1.1 200 OK 
Initial set of cookies: 
- [version: 0][name: PHPSESSID][value: e76f3b507a3db64cf1d4ad2297fb0c58][domain: www.rfp.ca][path: /][expiry: null] 
Exception in thread "main" org.apache.http.client.ClientProtocolException 
    at org.apache.http.impl.client.AbstractHttpClient.execute(AbstractHttpClient.java:822) 
    at org.apache.http.impl.client.AbstractHttpClient.execute(AbstractHttpClient.java:754) 
    at org.apache.http.impl.client.AbstractHttpClient.execute(AbstractHttpClient.java:732) 
    at Crawler.HttpGetter.main(HttpGetter.java:203) 
Caused by: org.apache.http.ProtocolException: Invalid redirect URI: ?Ûiÿü0·éq¯æɧ¢éí 
    at org.apache.http.impl.client.DefaultRedirectStrategy.createLocationURI(DefaultRedirectStrategy.java:185) 
    at org.apache.http.impl.client.DefaultRedirectStrategy.getLocationURI(DefaultRedirectStrategy.java:116) 
    at org.apache.http.impl.client.DefaultRedirectStrategy.getRedirect(DefaultRedirectStrategy.java:193) 
    at org.apache.http.impl.client.DefaultRequestDirector.handleResponse(DefaultRequestDirector.java:1035) 
    at org.apache.http.impl.client.DefaultRequestDirector.execute(DefaultRequestDirector.java:492) 
    at org.apache.http.impl.client.AbstractHttpClient.execute(AbstractHttpClient.java:820) 
    ... 3 more 
Caused by: java.net.URISyntaxException: Illegal character in path at index 0: ?Ûiÿü0·éq¯æɧ¢éí 
    at java.net.URI$Parser.fail(URI.java:2809) 
    at java.net.URI$Parser.checkChars(URI.java:2982) 
    at java.net.URI$Parser.parseHierarchical(URI.java:3066) 
    at java.net.URI$Parser.parse(URI.java:3024) 
    at java.net.URI.<init>(URI.java:578) 
    at org.apache.http.impl.client.DefaultRedirectStrategy.createLocationURI(DefaultRedirectStrategy.java:183) 
    ... 8 more 

答えて

4

私はこれについてはよく分からないが、私は形で見ることができるものから、それは次のパラメータ

return_url: 
action:login 
username:myusername 
password:mypassword 

を期待そして、あなたがPOSTリクエストの最初の2つを提供していません。

更新:このような場合に適切なパラメータを取得するには、ブラウザでURLを開き、FirebugまたはDeveloper Tools(WebKit)のネットワークアクティビティを監視するのが最適です。プログラムで送信する必要のある内容が正確に表示されます。

+0

この度はお返事いただきありがとうございます。この2つのパラメータを追加しました。nvps.add(新しいBasicNameValuePair( "return_url"、 "http://www.rfp.ca/my_account/")); \t nvps.add(新しいBasicNameValuePair( "action"、 "login")); nvps.add(新BasicNameValuePair(「return_url」、「http://www.rfp.ca/my_account/」:それは – EricSRK

+0

は、私が代わりに実際のURLを使用しての溶液、 を見つけた – EricSRK

+0

アップデートを参照してくださいいくつかのエラーを返しました)); パラメータ値を空白のままにしました。 nvps.add(new BasicNameValuePair( "return_url"、 "")); とWebサーバーだけがリダイレクトを処理します。 乾杯 Eric – EricSRK

関連する問題