1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
|
import requests
import urllib.parse
import json
import re
import math
import hashlib
from bs4 import BeautifulSoup
headers = {
'Referer': 'https://www.acmespb.ru/',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.89 Safari/537.36'
}
proxies = {
'http': 'socks5://127.0.0.1:1079',
'https': 'socks5://127.0.0.1:1079'
}
per_page = 50
session = requests.Session()
session.proxies.update(proxies)
session.headers.update(headers)
class AcmeException(Exception):
pass
class AcmePharmacy:
def __init__(self, name='', address='', phone='', geo=None):
self.name = name
self.address = address
self.phone = phone
self.geo = geo
def as_dict(self):
dict = self.__dict__
dict['hash'] = hashlib.md5(("%s|%s" % (self.address, self.name)).encode('utf-8')).hexdigest()
return dict
class AcmeOffer:
def __init__(self, name='', country='', pharmacy=None, price=None):
self.name = name
self.country = country
self.pharmacy = pharmacy
self.price = price
def as_dict(self):
dict = self.__dict__
dict['pharmacy'] = self.pharmacy.as_dict()
return dict
def search(query):
url = "https://www.acmespb.ru/lib/autocomplete.php?term=" + urllib.parse.quote(query)
r = session.get(url, allow_redirects=False)
if r.text == "":
return []
r.encoding = "utf-8"
return json.loads(r.text)
def trade_names(query):
url = "https://www.acmespb.ru/search.php"
r = session.post(url, {"free_str": query}, allow_redirects=False)
if r.status_code != 301:
raise AcmeException("status_code is %d" % (r.status_code,))
if '/trade/' not in r.headers["location"]:
return r.headers["location"], None
r = session.get(r.headers["location"], allow_redirects=False)
r.encoding = "utf-8"
soup = BeautifulSoup(r.text, "html.parser")
trades = soup.find(id="trades")
return None, [opt.string for opt in trades.find_all("option") if opt["value"] != "all"]
def _get_location(query):
url = "https://www.acmespb.ru/search.php"
data = {"free_str": query}
r = session.post(url, data, allow_redirects=False)
return r.headers["location"]
def offers(query, target_url=None, page=1):
if target_url is None:
target_url = _get_location(query)
data = {
"free_str": query,
"page": page
}
r = session.post(target_url, data, allow_redirects=False)
r.encoding = "utf-8"
if r.status_code != 200:
raise AcmeException("status_code is %d, expected 200" % (r.status_code,))
pages = 1
soup = BeautifulSoup(r.text, "html.parser")
p = soup.find("p", class_="red")
if p:
total_matches = int(re.findall("([0-9]+)", p.string)[0])
pages = math.ceil(total_matches / per_page)
offers = []
for trow in soup.find_all('div', class_='trow'):
if 'thead' in trow['class']:
continue
name = trow.select_one('.cell.name p.sra').text
country = trow.select_one('.cell.country').text
phname = trow.select_one('.cell.pharm a').text
price = float(trow.select_one('.cell.pricefull').text)
# parse address, geo coordinates and phone number
addr_div = trow.select_one('.cell.address')
phone = re.findall('тел\.([^<]+)', addr_div.text)[0].strip()
addr_link = addr_div.select_one('a')
address = addr_link.text
geo = re.findall('text=([0-9\.]+),([0-9\.]+)', addr_link['href'])[0]
geo = list(map(lambda x: float(x), geo))
acmepharm = AcmePharmacy(name=phname, address=address, phone=phone, geo=geo)
acmeoffer = AcmeOffer(name=name, country=country, price=price, pharmacy=acmepharm)
offers.append(acmeoffer)
return target_url, pages, offers
|