diff options
Diffstat (limited to 'app/acmespb.py')
-rw-r--r-- | app/acmespb.py | 133 |
1 files changed, 0 insertions, 133 deletions
diff --git a/app/acmespb.py b/app/acmespb.py deleted file mode 100644 index 5ebc41f..0000000 --- a/app/acmespb.py +++ /dev/null @@ -1,133 +0,0 @@ -import requests -import urllib.parse -import json -import re -import math -import hashlib - -from bs4 import BeautifulSoup - -headers = { - 'Referer': 'https://www.acmespb.ru/', - 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.89 Safari/537.36' -} -proxies = { - 'http': 'socks5://127.0.0.1:1079', - 'https': 'socks5://127.0.0.1:1079' -} -per_page = 50 - -session = requests.Session() -session.proxies.update(proxies) -session.headers.update(headers) - - -class AcmeException(Exception): - pass - - -class AcmePharmacy: - def __init__(self, name='', address='', phone='', geo=None): - self.name = name - self.address = address - self.phone = phone - self.geo = geo - - def as_dict(self): - dict = self.__dict__ - dict['hash'] = hashlib.md5(("%s|%s" % (self.address, self.name)).encode('utf-8')).hexdigest() - return dict - - -class AcmeOffer: - def __init__(self, name='', country='', pharmacy=None, price=None): - self.name = name - self.country = country - self.pharmacy = pharmacy - self.price = price - - def as_dict(self): - dict = self.__dict__ - dict['pharmacy'] = self.pharmacy.as_dict() - return dict - - -def search(query): - url = "https://www.acmespb.ru/lib/autocomplete.php?term=" + urllib.parse.quote(query) - r = session.get(url, allow_redirects=False) - if r.text == "": - return [] - - r.encoding = "utf-8" - return json.loads(r.text) - - -def trade_names(query): - url = "https://www.acmespb.ru/search.php" - r = session.post(url, {"free_str": query}, allow_redirects=False) - if r.status_code != 301: - raise AcmeException("status_code is %d" % (r.status_code,)) - if '/trade/' not in r.headers["location"]: - return r.headers["location"], None - - r = session.get(r.headers["location"], allow_redirects=False) - r.encoding = "utf-8" - soup = BeautifulSoup(r.text, "html.parser") - trades = soup.find(id="trades") - return None, [opt.string for opt in trades.find_all("option") if opt["value"] != "all"] - - -def _get_location(query): - url = "https://www.acmespb.ru/search.php" - data = {"free_str": query} - r = session.post(url, data, allow_redirects=False) - return r.headers["location"] - - -def offers(query, target_url=None, page=1): - if target_url is None: - target_url = _get_location(query) - - data = { - "free_str": query, - "page": page - } - r = session.post(target_url, data, allow_redirects=False) - r.encoding = "utf-8" - if r.status_code != 200: - raise AcmeException("status_code is %d, expected 200" % (r.status_code,)) - - pages = 1 - - soup = BeautifulSoup(r.text, "html.parser") - p = soup.find("p", class_="red") - if p: - total_matches = int(re.findall("([0-9]+)", p.string)[0]) - pages = math.ceil(total_matches / per_page) - - offers = [] - for trow in soup.find_all('div', class_='trow'): - if 'thead' in trow['class']: - continue - - name = trow.select_one('.cell.name p.sra').text - country = trow.select_one('.cell.country').text - phname = trow.select_one('.cell.pharm a').text - price = float(trow.select_one('.cell.pricefull').text) - - # parse address, geo coordinates and phone number - addr_div = trow.select_one('.cell.address') - phone = re.findall('тел\.([^<]+)', addr_div.text)[0].strip() - - addr_link = addr_div.select_one('a') - address = addr_link.text - - geo = re.findall('text=([0-9\.]+),([0-9\.]+)', addr_link['href'])[0] - geo = list(map(lambda x: float(x), geo)) - - acmepharm = AcmePharmacy(name=phname, address=address, phone=phone, geo=geo) - acmeoffer = AcmeOffer(name=name, country=country, price=price, pharmacy=acmepharm) - - offers.append(acmeoffer) - - return target_url, pages, offers
\ No newline at end of file |