2017-02-15 4 views
-3

このpythonプログラムを実行するとエラーが発生します 「/path/to/times-testing.log」にファイルやディレクトリがありません。 私は理解していないようだ、誰もこの問題を解決するのに役立つことができます。 ありがとうございます!Pythonスクリプトでエラーが発生しましたか?

相続コード:

import urllib2 
import json 
import datetime 
import time 
import sys, os 
import logging 
from urllib2 import HTTPError 
from ConfigParser import SafeConfigParser 


# helper function to iterate through dates 
def daterange(start_date, end_date): 
    if start_date <= end_date: 
     for n in range((end_date - start_date).days + 1): 
      yield start_date + datetime.timedelta(n) 
    else: 
     for n in range((start_date - end_date).days + 1): 
      yield start_date - datetime.timedelta(n) 

# helper function to get json into a form I can work with  
def convert(input): 
    if isinstance(input, dict): 
     return {convert(key): convert(value) for key, value in input.iteritems()} 
    elif isinstance(input, list): 
     return [convert(element) for element in input] 
    elif isinstance(input, unicode): 
     return input.encode('utf-8') 
    else: 
     return input 

# helpful function to figure out what to name individual JSON files   
def getJsonFileName(date, page, json_file_path): 
    json_file_name = ".".join([date,str(page),'json']) 
    json_file_name = "".join([json_file_path,json_file_name]) 
    return json_file_name 

# helpful function for processing keywords, mostly  
def getMultiples(items, key): 
    values_list = "" 
    if len(items) > 0: 
     num_keys = 0 
     for item in items: 
      if num_keys == 0: 
       values_list = item[key]     
      else: 
       values_list = "; ".join([values_list,item[key]]) 
      num_keys += 1 
    return values_list 

# get the articles from the NYTimes Article API  
def getArticles(date, query, api_key, json_file_path): 
    # LOOP THROUGH THE 101 PAGES NYTIMES ALLOWS FOR THAT DATE 
    for page in range(101): 
     for n in range(5): # 5 tries 
      try: 
       request_string = "http://api.nytimes.com/svc/search/v2/articlesearch.json?begin_date=" + date + "&end_date=" + date + "&page=" + str(page) + "&api-key=" + api_key 
       response = urllib2.urlopen(request_string) 
       content = response.read() 
       if content: 
        articles = convert(json.loads(content)) 
        # if there are articles here 
        if len(articles["response"]["docs"]) >= 1: 
         json_file_name = getJsonFileName(date, page, json_file_path) 
         json_file = open(json_file_name, 'w') 
         json_file.write(content) 
         json_file.close() 
        # if no more articles, go to next date 
        else: 
         return 
       time.sleep(3) # wait so we don't overwhelm the API 
      except HTTPError as e: 
       logging.error("HTTPError on page %s on %s (err no. %s: %s) Here's the URL of the call: %s", page, date, e.code, e.reason, request_string) 
       if e.code == 403: 
        print "Script hit a snag and got an HTTPError 403. Check your log file for more info." 
        return 
       if e.code == 429: 
        print "Waiting. You've probably reached an API limit." 
        time.sleep(30) # wait 30 seconds and try again 
      except: 
       logging.error("Error on %s page %s: %s", date, file_number, sys.exc_info()[0]) 
       continue 

# parse the JSON files you stored into a tab-delimited file 
def parseArticles(date, tsv_file_name, json_file_path): 

    for file_number in range(101): 
     # get the articles and put them into a dictionary 
     try: 
      file_name = getJsonFileName(date,file_number, json_file_path) 
      if os.path.isfile(file_name): 
       in_file = open(file_name, 'r') 
       articles = convert(json.loads(in_file.read())) 
       in_file.close() 
      else: 
       break 
     except IOError as e: 
      logging.error("IOError in %s page %s: %s %s", date, file_number, e.errno, e.strerror) 
      continue 

     # if there are articles in that document, parse them 
     if len(articles["response"]["docs"]) >= 1: 

      # open the tsv for appending 
      try: 
       out_file = open(tsv_file_name, 'ab') 

      except IOError as e: 
       logging.error("IOError: %s %s %s %s", date, file_number, e.errno, e.strerror) 
       continue 

      # loop through the articles putting what we need in a tsv 
      try: 
       for article in articles["response"]["docs"]: 
        # if (article["source"] == "The New York Times" and article["document_type"] == "article"): 
        keywords = "" 
        keywords = getMultiples(article["keywords"],"value") 

        # should probably pull these if/else checks into a module 
        variables = [ 
         article["pub_date"], 
         keywords, 
         str(article["headline"]["main"]).decode("utf8").replace("\n","") if "main" in article["headline"].keys() else "", 
         str(article["source"]).decode("utf8") if "source" in article.keys() else "", 
         str(article["document_type"]).decode("utf8") if "document_type" in article.keys() else "", 
         article["web_url"] if "web_url" in article.keys() else "", 
         str(article["news_desk"]).decode("utf8") if "news_desk" in article.keys() else "", 
         str(article["section_name"]).decode("utf8") if "section_name" in article.keys() else "", 
         str(article["snippet"]).decode("utf8").replace("\n","") if "snippet" in article.keys() else "", 
         str(article["lead_paragraph"]).decode("utf8").replace("\n","") if "lead_paragraph" in article.keys() else "", 
         ] 
        line = "\t".join(variables) 
        out_file.write(line.encode("utf8")+"\n") 
      except KeyError as e: 
       logging.error("KeyError in %s page %s: %s %s", date, file_number, e.errno, e.strerror) 
       continue 
      except (KeyboardInterrupt, SystemExit): 
       raise 
      except: 
       logging.error("Error on %s page %s: %s", date, file_number, sys.exc_info()[0]) 
       continue 

      out_file.close() 
     else: 
      break 

# Main function where stuff gets done 

def main(): 

    config = SafeConfigParser() 
    script_dir = os.path.dirname(__file__) 
    config_file = os.path.join(script_dir, 'config/settings.cfg') 
    config.read(config_file) 

    json_file_path = config.get('files','json_folder') 
    tsv_file_name = config.get('files','tsv_file') 
    log_file = config.get('files','logfile') 

    api_key = config.get('nytimes','api_key')  
    start = datetime.date(year = int(config.get('nytimes','start_year')), month = int(config.get('nytimes','start_month')), day = int(config.get('nytimes','start_day'))) 
    end = datetime.date(year = int(config.get('nytimes','end_year')), month = int(config.get('nytimes','end_month')), day = int(config.get('nytimes','end_day'))) 
    query = config.get('nytimes','query') 

    logging.basicConfig(filename=log_file, level=logging.INFO) 

    logging.info("Getting started.") 
    try: 
     # LOOP THROUGH THE SPECIFIED DATES 
     for date in daterange(start, end): 
      date = date.strftime("%Y%m%d") 
      logging.info("Working on %s." % date) 
      getArticles(date, query, api_key, json_file_path) 
      parseArticles(date, tsv_file_name, json_file_path) 
    except: 
     logging.error("Unexpected error: %s", str(sys.exc_info()[0])) 
    finally: 
     logging.info("Finished.") 

if __name__ == '__main__' : 
    main() 

それをコンパイルするときには、次のエラーを生成します。

Rakeshs-MacBook-Air:get-nytimes-articles-master niharika$ python getTimesArticles.py 
Traceback (most recent call last): 
    File "getTimesArticles.py", line 180, in <module> 
    main() 
    File "getTimesArticles.py", line 164, in main 
    logging.basicConfig(filename=log_file, level=logging.INFO) 
    File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/logging/__init__.py", line 1545, in basicConfig 
    hdlr = FileHandler(filename, mode) 
    File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/logging/__init__.py", line 911, in __init__ 
    StreamHandler.__init__(self, self._open()) 
    File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/logging/__init__.py", line 941, in _open 
    stream = open(self.baseFilename, self.mode) 
IOError: [Errno 2] No such file or directory: '/path/to/times-testing.log' 
Rakeshs-MacBook-Air:get-nytimes-articles-master niharika$ 
+0

ログファイルがありません 'touch /path/to/times-testing.log'ログファイルに触れます – mtt2p

+0

ファイルconfig/settings.cfgの内容は何ですか?特に、主要なログファイルに関連する値は何ですか?対応するファイルはディスク上に存在しますか? – Antwane

+0

はい、/desktop/nytpy/config/settings.cfcにsettings.cfcがあります –

答えて

0

あなたmain()機能:

​​

config/settings.cfgファイルを開き、名前をフェッチログファイルの内容は、/path/to/times-testing.log。そのフォルダを作成するか(おそらくベスト・アイデアではない)、正しいファイルを指すように構成する必要があります。

+0

ありがとうございます、あなたの答えは非常に洞察力があります、私はsettings.cfgファイルを探して、logfile = /path/to/times-testing.logを指定するパラメータがあります json_folder =/full/path/ tsv_file = full/path/output.tsv、私は新しい空のファイル、すなわちoutput.tsvとtimes-testing.logファイルを作成し、パスを追加すべきですか? Json_folderパラメータに何を設定する必要がありますか、ありがとうございます! –

+0

まあ、それはあなた次第で、それらのファイルをどこに置いてほしいのですか。 :)通常、ログはUNIXシステムの '/ var/log /'フォルダ(macOSなど)に保存されます。 – sxn

関連する問題