summaryrefslogtreecommitdiff
path: root/app/acmespb.py
diff options
context:
space:
mode:
Diffstat (limited to 'app/acmespb.py')
-rw-r--r--app/acmespb.py133
1 files changed, 133 insertions, 0 deletions
diff --git a/app/acmespb.py b/app/acmespb.py
new file mode 100644
index 0000000..5ebc41f
--- /dev/null
+++ b/app/acmespb.py
@@ -0,0 +1,133 @@
+import requests
+import urllib.parse
+import json
+import re
+import math
+import hashlib
+
+from bs4 import BeautifulSoup
+
+headers = {
+ 'Referer': 'https://www.acmespb.ru/',
+ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.89 Safari/537.36'
+}
+proxies = {
+ 'http': 'socks5://127.0.0.1:1079',
+ 'https': 'socks5://127.0.0.1:1079'
+}
+per_page = 50
+
+session = requests.Session()
+session.proxies.update(proxies)
+session.headers.update(headers)
+
+
+class AcmeException(Exception):
+ pass
+
+
+class AcmePharmacy:
+ def __init__(self, name='', address='', phone='', geo=None):
+ self.name = name
+ self.address = address
+ self.phone = phone
+ self.geo = geo
+
+ def as_dict(self):
+ dict = self.__dict__
+ dict['hash'] = hashlib.md5(("%s|%s" % (self.address, self.name)).encode('utf-8')).hexdigest()
+ return dict
+
+
+class AcmeOffer:
+ def __init__(self, name='', country='', pharmacy=None, price=None):
+ self.name = name
+ self.country = country
+ self.pharmacy = pharmacy
+ self.price = price
+
+ def as_dict(self):
+ dict = self.__dict__
+ dict['pharmacy'] = self.pharmacy.as_dict()
+ return dict
+
+
+def search(query):
+ url = "https://www.acmespb.ru/lib/autocomplete.php?term=" + urllib.parse.quote(query)
+ r = session.get(url, allow_redirects=False)
+ if r.text == "":
+ return []
+
+ r.encoding = "utf-8"
+ return json.loads(r.text)
+
+
+def trade_names(query):
+ url = "https://www.acmespb.ru/search.php"
+ r = session.post(url, {"free_str": query}, allow_redirects=False)
+ if r.status_code != 301:
+ raise AcmeException("status_code is %d" % (r.status_code,))
+ if '/trade/' not in r.headers["location"]:
+ return r.headers["location"], None
+
+ r = session.get(r.headers["location"], allow_redirects=False)
+ r.encoding = "utf-8"
+ soup = BeautifulSoup(r.text, "html.parser")
+ trades = soup.find(id="trades")
+ return None, [opt.string for opt in trades.find_all("option") if opt["value"] != "all"]
+
+
+def _get_location(query):
+ url = "https://www.acmespb.ru/search.php"
+ data = {"free_str": query}
+ r = session.post(url, data, allow_redirects=False)
+ return r.headers["location"]
+
+
+def offers(query, target_url=None, page=1):
+ if target_url is None:
+ target_url = _get_location(query)
+
+ data = {
+ "free_str": query,
+ "page": page
+ }
+ r = session.post(target_url, data, allow_redirects=False)
+ r.encoding = "utf-8"
+ if r.status_code != 200:
+ raise AcmeException("status_code is %d, expected 200" % (r.status_code,))
+
+ pages = 1
+
+ soup = BeautifulSoup(r.text, "html.parser")
+ p = soup.find("p", class_="red")
+ if p:
+ total_matches = int(re.findall("([0-9]+)", p.string)[0])
+ pages = math.ceil(total_matches / per_page)
+
+ offers = []
+ for trow in soup.find_all('div', class_='trow'):
+ if 'thead' in trow['class']:
+ continue
+
+ name = trow.select_one('.cell.name p.sra').text
+ country = trow.select_one('.cell.country').text
+ phname = trow.select_one('.cell.pharm a').text
+ price = float(trow.select_one('.cell.pricefull').text)
+
+ # parse address, geo coordinates and phone number
+ addr_div = trow.select_one('.cell.address')
+ phone = re.findall('тел\.([^<]+)', addr_div.text)[0].strip()
+
+ addr_link = addr_div.select_one('a')
+ address = addr_link.text
+
+ geo = re.findall('text=([0-9\.]+),([0-9\.]+)', addr_link['href'])[0]
+ geo = list(map(lambda x: float(x), geo))
+
+ acmepharm = AcmePharmacy(name=phname, address=address, phone=phone, geo=geo)
+ acmeoffer = AcmeOffer(name=name, country=country, price=price, pharmacy=acmepharm)
+
+ offers.append(acmeoffer)
+
+ return target_url, pages, offers \ No newline at end of file