-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathWatchOffer.py
More file actions
73 lines (55 loc) · 2.25 KB
/
WatchOffer.py
File metadata and controls
73 lines (55 loc) · 2.25 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
from datetime import datetime
import bs4
from requests_html import HTMLSession
class WatchOffer:
def __init__(self, url: str, title: str, price: int, currency: str):
self.timestamp = datetime.now()
self.url = url
self.title = title
self.currency = currency
self.price = price
self.condition = None
self.productionyear = None
self.has_original_box = None
self.has_original_papers = None
self.location = None
self.description = None
self.fetch_details()
def fetch_details(self):
print("fetching:", self.url)
session = HTMLSession()
doc = session.get(self.url)
doc.html.render(timeout=60)
page_content = bs4.BeautifulSoup(doc.html.raw_html, "lxml")
# print(page_content)
main_section = page_content.find("main")
specs_section = main_section.find("section", {"id":"jq-specifications"})
tables = specs_section.find_all("table")
specs = tables[0]
desc = tables[1]
spec_rows = specs.find_all("tr")
for row in spec_rows:
if "Zustand" in row.text:
cols = row.find_all("td")
self.condition = cols[1].find("a").text.strip()
elif "Lieferumfang" in row.text:
cols = row.find_all("td")
content = cols[1].text.strip().lower()
elements = content.split(",")
for element in elements:
cond = True if "mit" in element else False
if "original-box" in element:
self.has_original_box = cond
elif "original-papiere" in element:
self.has_original_papers = cond
else:
print("error while parsing box and papers")
elif "Herstellungsjahr" in row.text:
cols = row.find_all("td")
self.productionyear = cols[1].text.strip()
elif "Standort" in row.text:
cols = row.find_all("td")
self.location = cols[1].text.strip()
desc_rows = desc.find_all("tr")
if len(desc_rows)>=2:
self.description = desc_rows[1].text.strip()