summaryrefslogtreecommitdiff
path: root/app/acmespb.py
blob: 5ebc41f05349698e88da34efff379cd57d2d90df (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import requests
import urllib.parse
import json
import re
import math
import hashlib

from bs4 import BeautifulSoup

headers = {
    'Referer': 'https://www.acmespb.ru/',
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.89 Safari/537.36'
}
proxies = {
    'http': 'socks5://127.0.0.1:1079',
    'https': 'socks5://127.0.0.1:1079'
}
per_page = 50

session = requests.Session()
session.proxies.update(proxies)
session.headers.update(headers)


class AcmeException(Exception):
    pass


class AcmePharmacy:
    def __init__(self, name='', address='', phone='', geo=None):
        self.name = name
        self.address = address
        self.phone = phone
        self.geo = geo

    def as_dict(self):
        dict = self.__dict__
        dict['hash'] = hashlib.md5(("%s|%s" % (self.address, self.name)).encode('utf-8')).hexdigest()
        return dict


class AcmeOffer:
    def __init__(self, name='', country='', pharmacy=None, price=None):
        self.name = name
        self.country = country
        self.pharmacy = pharmacy
        self.price = price

    def as_dict(self):
        dict = self.__dict__
        dict['pharmacy'] = self.pharmacy.as_dict()
        return dict


def search(query):
    url = "https://www.acmespb.ru/lib/autocomplete.php?term=" + urllib.parse.quote(query)
    r = session.get(url, allow_redirects=False)
    if r.text == "":
        return []

    r.encoding = "utf-8"
    return json.loads(r.text)


def trade_names(query):
    url = "https://www.acmespb.ru/search.php"
    r = session.post(url, {"free_str": query}, allow_redirects=False)
    if r.status_code != 301:
        raise AcmeException("status_code is %d" % (r.status_code,))
    if '/trade/' not in r.headers["location"]:
        return r.headers["location"], None

    r = session.get(r.headers["location"], allow_redirects=False)
    r.encoding = "utf-8"
    soup = BeautifulSoup(r.text, "html.parser")
    trades = soup.find(id="trades")
    return None, [opt.string for opt in trades.find_all("option") if opt["value"] != "all"]


def _get_location(query):
    url = "https://www.acmespb.ru/search.php"
    data = {"free_str": query}
    r = session.post(url, data, allow_redirects=False)
    return r.headers["location"]


def offers(query, target_url=None, page=1):
    if target_url is None:
        target_url = _get_location(query)

    data = {
        "free_str": query,
        "page": page
    }
    r = session.post(target_url, data, allow_redirects=False)
    r.encoding = "utf-8"
    if r.status_code != 200:
        raise AcmeException("status_code is %d, expected 200" % (r.status_code,))

    pages = 1

    soup = BeautifulSoup(r.text, "html.parser")
    p = soup.find("p", class_="red")
    if p:
        total_matches = int(re.findall("([0-9]+)", p.string)[0])
        pages = math.ceil(total_matches / per_page)

    offers = []
    for trow in soup.find_all('div', class_='trow'):
        if 'thead' in trow['class']:
            continue

        name = trow.select_one('.cell.name p.sra').text
        country = trow.select_one('.cell.country').text
        phname = trow.select_one('.cell.pharm a').text
        price = float(trow.select_one('.cell.pricefull').text)

        # parse address, geo coordinates and phone number
        addr_div = trow.select_one('.cell.address')
        phone = re.findall('тел\.([^<]+)', addr_div.text)[0].strip()

        addr_link = addr_div.select_one('a')
        address = addr_link.text

        geo = re.findall('text=([0-9\.]+),([0-9\.]+)', addr_link['href'])[0]
        geo = list(map(lambda x: float(x), geo))

        acmepharm = AcmePharmacy(name=phname, address=address, phone=phone, geo=geo)
        acmeoffer = AcmeOffer(name=name, country=country, price=price, pharmacy=acmepharm)

        offers.append(acmeoffer)

    return target_url, pages, offers