|
5 | 5 | import urllib |
6 | 6 | import re |
7 | 7 | import time |
| 8 | +import argparse |
| 9 | + |
| 10 | +parser = argparse.ArgumentParser() |
| 11 | +parser.add_argument('-f', '--fileName', help='the file of Borrow Direct data. optional - if not provided, the script will ask for input') |
| 12 | +args = parser.parse_args() |
| 13 | + |
| 14 | +if args.fileName: |
| 15 | + fileName = args.fileName |
| 16 | +else: |
| 17 | + fileName = raw_input('Enter the file of Borrow Direct data: ') |
8 | 18 |
|
9 | 19 | startTime = time.time() |
10 | 20 |
|
11 | | -fileName = raw_input('Enter file name: ') |
12 | 21 | fileNameWithoutExtension = fileName[:fileName.index('.')] |
13 | 22 |
|
14 | 23 | baseURL = 'http://www.worldcat.org/webservices/catalog/search/opensearch?q=' |
15 | 24 | baseURL2 = 'http://www.worldcat.org/webservices/catalog/content/' |
16 | 25 |
|
| 26 | +with open(fileName) as csvfile: |
| 27 | + reader = csv.DictReader(csvfile) |
| 28 | + rowCount = len(list(reader)) |
| 29 | + |
17 | 30 | wskey = secrets.wskey |
18 | 31 | f=csv.writer(open(fileNameWithoutExtension+'oclcSearchMatches.csv', 'wb')) |
19 | 32 | f.writerow(['searchOclcNum']+['borrower']+['lender']+['status']+['patronType']+['isbn']+['searchTitle']+['searchAuthor']+['searchDate']+['oclcNum']+['oclcTitle']+['oclcAuthor']+['oclcPublisher']+['callNumLetters']+['callNumFull']+['physDesc']+['oclcDate']) |
|
22 | 35 | with open(fileName) as csvfile: |
23 | 36 | reader = csv.DictReader(csvfile) |
24 | 37 | for row in reader: |
| 38 | + rowCount -= 1 |
| 39 | + print 'Items remaining: ', rowCount |
25 | 40 | borrower = row['BORROWER'] |
26 | 41 | lender = row['LENDER'] |
27 | 42 | status = row['STATUS'] |
|
34 | 49 | searchPublisher = row['PUBLISHER'] |
35 | 50 | searchDate = row['PUBLICATION YEAR'] |
36 | 51 | try: |
37 | | - response = requests.get('http://www.worldcat.org/webservices/catalog/content/'+searchOclcNum+'?format=rss&wskey='+wskey).content |
| 52 | + response = requests.get('http://www.worldcat.org/webservices/catalog/content/'+searchOclcNum+'?format=rss&wskey='+wskey) |
| 53 | + response = response.content |
38 | 54 | record = BeautifulSoup(response, "lxml").find('record') |
39 | 55 | oclcNum = record.find('controlfield', {'tag' : '001'}).text |
| 56 | + print 'search oclc #' |
40 | 57 | except: |
41 | 58 | originalTitle = searchTitle |
42 | 59 | search = urllib.quote(searchTitle) |
43 | | - print search |
44 | | - response = requests.get(baseURL+search.strip()+'&count=1&format=rss&wskey='+wskey).content |
| 60 | + response = requests.get(baseURL+search.strip()+'&count=1&format=rss&wskey='+wskey) |
| 61 | + print 'search title' |
| 62 | + response = response.content |
45 | 63 | record = BeautifulSoup(response, "lxml").findAll('item') |
46 | 64 | if record != []: |
47 | 65 | record = record[0] |
48 | 66 | url = record.find('guid').text.encode('utf-8') |
49 | 67 | oclcNum = url.replace('http://worldcat.org/oclc/','') |
50 | 68 | oclcAuthor = record.find('author').find('name').text.encode('utf-8') |
51 | 69 |
|
52 | | - response2 = requests.get(baseURL2+oclcNum+'?servicelevel=full&classificationScheme=LibraryOfCongress&wskey='+wskey).content |
| 70 | + response2 = requests.get(baseURL2+oclcNum+'?servicelevel=full&classificationScheme=LibraryOfCongress&wskey='+wskey) |
| 71 | + print 'search full record' |
| 72 | + response2 = response2.content |
53 | 73 | try: |
54 | 74 | record2 = BeautifulSoup(response2, "lxml").find('record') |
55 | 75 | try: |
|
0 commit comments