-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbookScraper.py
More file actions
147 lines (117 loc) · 4.14 KB
/
bookScraper.py
File metadata and controls
147 lines (117 loc) · 4.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import argparse
import logging
import math
import os
import re
from urllib.parse import unquote
import requests
from bs4 import BeautifulSoup
from tqdm import tqdm
from libGen import Library
logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s", level=logging.WARNING)
def check_isbn(isbn):
"""Return true if input is valid ISBN"""
isbn = str(isbn)
try:
int(isbn)
except ValueError:
return False
isbn = [int(char) for char in isbn]
if len(isbn) == 10:
s = 0
t = 0
for i in range(10):
t += isbn[i]
s += t
return s % 11 == 0
elif len(isbn) == 13:
s = 0
for i in range(13):
if i % 2 == 0:
s += isbn[i]
else:
s += isbn[i] * 3
return s % 10 == 0
else:
return False
def get_books(isbn=None, title=None, author=None):
lib = Library()
if isbn:
logging.debug(f"Searching for ISBN: {isbn}")
ids = lib.search(isbn, mode="isbn")
elif title:
logging.debug(f"Searching for title: {author}")
ids = lib.search(title, mode="title")
elif author:
logging.debug(f"Searching for author: {title}")
ids = lib.search(author, mode="author")
else:
return []
logging.debug(f"IDs Found: {ids}")
return lib.lookup(ids)
def download_books(md5, mirror='http://93.174.95.29/main/'):
url = mirror + md5
webpage = requests.get(url)
soup = BeautifulSoup(webpage.content, 'html.parser')
link = soup.find('a', href=True, text='GET')['href']
filename = unquote(link.split("/")[-1])
r = requests.get(link, stream=True)
total_size = int(r.headers.get('content-length', 0))
block_size = 1024
wrote = 0
if not os.path.exists(os.path.join(os.getcwd(), filename)):
with open(filename, 'wb') as f:
for data in tqdm(r.iter_content(block_size),
total=math.ceil(total_size // block_size),
unit='KB',
unit_scale=True,
unit_divisor=1024,
smoothing=0.1):
wrote += len(data)
f.write(data)
else:
logging.error("File already exists")
if total_size != 0 and wrote != total_size:
logging.error("Download error")
else:
logging.info(f"File downloaded successfully: {filename}")
def main():
parser = argparse.ArgumentParser(description="Downloads books based on ISBN")
parser.add_argument("isbn", help="ISBN of book to download")
args = parser.parse_args()
if args.isbn.isdigit():
try:
book, *_ = get_books(isbn=args.isbn)
download_books(book.__dict__['md5'])
except requests.exceptions.HTTPError as e:
if "500" in str(e.response):
logging.error(f"Book not found for ISBN: {args.isbn}")
else:
logging.error(str(e.response))
elif os.path.isfile(args.isbn):
with open(args.isbn) as f:
isbns = f.read()
isbns = re.findall(r"[\w']+", isbns)
isbns = [x for x in isbns if check_isbn(x)]
logging.debug(f"ISBNs: {isbns}")
for isbn in isbns:
try:
book, *_ = get_books(isbn=isbn)
download_books(book.__dict__['md5'])
except requests.exceptions.HTTPError as e:
if "500" in str(e.response):
logging.error(f"Book not found for ISBN: {isbn}")
else:
logging.error(str(e.response))
else:
logging.critical("Invalid input. Please input an ISBN or text file containing a list of ISBNs.")
def test():
good_isbns = ["9781566199094", 9781566199094, "1566199093", 1566199093]
bad_isbns = ["6937", "29323478523452983", "hello world!", 51, 582863, (5, 2), 4915834592]
for value in good_isbns:
assert check_isbn(value), "Valid ISBN was found invalid."
for value in bad_isbns:
assert not check_isbn(value), "Invalid ISBN was found valid."
if __name__ == "__main__":
main()
# test()