2016-10-03 1 views
1

私は米国国勢調査のウェブサイトからPython 2.7で美しいスープを使ってデータを取得しようとしています。これは私が使用するコードです:IOError:BeautifulSoupを使用している[Errno socket error]

import urllib 
from bs4 import BeautifulSoup 

url = "https://www.census.gov/quickfacts/table/PST045215/01" 
html = urllib.urlopen(url).read() 
soup = BeautifulSoup(html) 

はしかし、これは私が得たエラーです:

IOError         Traceback (most recent call last) 
<ipython-input-5-47941f5ea96a> in <module>() 
    59 
    60 url = "https://www.census.gov/quickfacts/table/PST045215/01" 
---> 61 html = urllib.urlopen(url).read() 
    62 soup = BeautifulSoup(html) 
    63 

/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib.pyc in urlopen(url, data, proxies, context) 
    85   opener = _urlopener 
    86  if data is None: 
---> 87   return opener.open(url) 
    88  else: 
    89   return opener.open(url, data) 

/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib.pyc in open(self, fullurl, data) 
    211   try: 
    212    if data is None: 
--> 213     return getattr(self, name)(url) 
    214    else: 
    215     return getattr(self, name)(url, data) 

/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib.pyc in open_https(self, url, data) 
    441    if realhost: h.putheader('Host', realhost) 
    442    for args in self.addheaders: h.putheader(*args) 
--> 443    h.endheaders(data) 
    444    errcode, errmsg, headers = h.getreply() 
    445    fp = h.getfile() 

/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.pyc in endheaders(self, message_body) 
    1051   else: 
    1052    raise CannotSendHeader() 
-> 1053   self._send_output(message_body) 
    1054 
    1055  def request(self, method, url, body=None, headers={}): 

/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.pyc in _send_output(self, message_body) 
    895    msg += message_body 
    896    message_body = None 
--> 897   self.send(msg) 
    898   if message_body is not None: 
    899    #message_body was not a string (i.e. it is a file) and 

/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.pyc in send(self, data) 
    857   if self.sock is None: 
    858    if self.auto_open: 
--> 859     self.connect() 
    860    else: 
    861     raise NotConnected() 

/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.pyc in connect(self) 
    1276 
    1277    self.sock = self._context.wrap_socket(self.sock, 
-> 1278             server_hostname=server_hostname) 
    1279 
    1280  __all__.append("HTTPSConnection") 

/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/ssl.pyc in wrap_socket(self, sock, server_side, do_handshake_on_connect, suppress_ragged_eofs, server_hostname) 
    351       suppress_ragged_eofs=suppress_ragged_eofs, 
    352       server_hostname=server_hostname, 
--> 353       _context=self) 
    354 
    355  def set_npn_protocols(self, npn_protocols): 

/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/ssl.pyc in __init__(self, sock, keyfile, certfile, server_side, cert_reqs, ssl_version, ca_certs, do_handshake_on_connect, family, type, proto, fileno, suppress_ragged_eofs, npn_protocols, ciphers, server_hostname, _context) 
    599       # non-blocking 
    600       raise ValueError("do_handshake_on_connect should not be specified for non-blocking sockets") 
--> 601      self.do_handshake() 
    602 
    603    except (OSError, ValueError): 

/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/ssl.pyc in do_handshake(self, block) 
    828    if timeout == 0.0 and block: 
    829     self.settimeout(None) 
--> 830    self._sslobj.do_handshake() 
    831   finally: 
    832    self.settimeout(timeout) 

IOError: [Errno socket error] [SSL: SSLV3_ALERT_HANDSHAKE_FAILURE] sslv3 alert handshake failure (_ssl.c:590) 

私は、このようなソリューションのthisthisなどの2つのスタックオーバーフロー源から見てきたが、彼らはそうではありません問題を解決してください。この問題への

答えて

2

つの回避策はrequestsに切り替えるには、次のようになります。

import requests 
from bs4 import BeautifulSoup 

url = "https://www.census.gov/quickfacts/table/PST045215/01" 
response = requests.get(url) 
soup = BeautifulSoup(response.content, "html.parser") 
print(soup.title.get_text()) 

プリント:

Alabama QuickFacts from the US Census Bureau 

注これはrequests\[security\] packageが同様にインストールする必要があること:

pip install requests[security] 
+0

ちょっとアレックス。はい。確かにあなたの方法はpipインストール要求[セキュリティ]の後に動作します。どうもありがとうございました。 – Fxs7576

関連する問題