From d05ab59b47a25432672cfe5b7ba53f3cb1a6d6cb Mon Sep 17 00:00:00 2001 From: Evgeny Zinoviev Date: Sun, 3 Jan 2021 21:41:50 +0300 Subject: initial commit --- .gitignore | 12 +++ app/__init__.py | 91 +++++++++++++++++++++ app/acmespb.py | 133 +++++++++++++++++++++++++++++++ app/static/app.js | 191 +++++++++++++++++++++++++++++++++++++++++++++ app/static/autocomplete.js | 135 ++++++++++++++++++++++++++++++++ app/static/style.css | 17 ++++ app/templates/base.html | 39 +++++++++ app/templates/index.html | 2 + app/test.py | 15 ++++ requirements.txt | 4 + server.py | 7 ++ 11 files changed, 646 insertions(+) create mode 100644 .gitignore create mode 100644 app/__init__.py create mode 100644 app/acmespb.py create mode 100644 app/static/app.js create mode 100644 app/static/autocomplete.js create mode 100644 app/static/style.css create mode 100644 app/templates/base.html create mode 100644 app/templates/index.html create mode 100644 app/test.py create mode 100644 requirements.txt create mode 100644 server.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..685b2cd --- /dev/null +++ b/.gitignore @@ -0,0 +1,12 @@ +*.pyc +__pycache__/ + +instance/ + +.pytest_cache/ +.coverage +htmlcov/ + +dist/ +build/ +*.egg-info/ \ No newline at end of file diff --git a/app/__init__.py b/app/__init__.py new file mode 100644 index 0000000..8ee21b9 --- /dev/null +++ b/app/__init__.py @@ -0,0 +1,91 @@ +import os +import time + +from . import acmespb +from flask import Flask, render_template +from flask_socketio import SocketIO, emit + +socketio = SocketIO() + + +def create_app(test_config=None): + app = Flask(__name__, instance_relative_config=True) + app.config.from_mapping( + SECRET_KEY='dev', + DATABASE=os.path.join(app.instance_path, 'app.sqlite'), + ) + + if test_config is None: + # load the instance config, if it exists, when not testing + app.config.from_pyfile('config.py', silent=True) + else: + # load the test config if passed in + app.config.from_mapping(test_config) + + # ensure the instance folder exists + try: + os.makedirs(app.instance_path) + except OSError: + pass + + socketio.init_app(app) + + @app.route('/') + def hello(): + return render_template('index.html') + + @socketio.on('get_hints') + def handle_get_hints_event(q): + print('[get_hints] id=%d, query=%s' % (q['id'], q['query'])) + if len(q['query']) < 3: + response = { + 'id': q['id'], + 'error': "query is too short" + } + emit('hints', response) + return + results = acmespb.search(q['query']) + response = { + 'id': q['id'], + 'response': results + } + emit('hints', response) + + @socketio.on('get_offers') + def handle_get_offers_event(q): + print('[get_offers] id=%d, query=%s' % (q['id'], q['query'])) + target_url, trade_names = acmespb.trade_names(q['query']) + if trade_names: + response = { + 'id': q['id'], + "response": trade_names + } + emit('hints', response) + return + + page = 1 + pages = 0 + target_url = None + while pages == 0 or page <= pages: + target_url, pages, offers = acmespb.offers(q['query'], page=page, target_url=target_url) + print("[%d] pages=%d, target_url=%s" % (page, pages, target_url)) + response = { + 'id': q['id'], + 'offers': [offer.as_dict() for offer in offers], + 'page': page, + 'pages': pages + } + emit('offers', response) + + time.sleep(0.5) + page += 1 + + response = { + 'id': q['id'], + 'end': True + } + emit('offers', response) + + # TODO empty response + + return app diff --git a/app/acmespb.py b/app/acmespb.py new file mode 100644 index 0000000..5ebc41f --- /dev/null +++ b/app/acmespb.py @@ -0,0 +1,133 @@ +import requests +import urllib.parse +import json +import re +import math +import hashlib + +from bs4 import BeautifulSoup + +headers = { + 'Referer': 'https://www.acmespb.ru/', + 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.89 Safari/537.36' +} +proxies = { + 'http': 'socks5://127.0.0.1:1079', + 'https': 'socks5://127.0.0.1:1079' +} +per_page = 50 + +session = requests.Session() +session.proxies.update(proxies) +session.headers.update(headers) + + +class AcmeException(Exception): + pass + + +class AcmePharmacy: + def __init__(self, name='', address='', phone='', geo=None): + self.name = name + self.address = address + self.phone = phone + self.geo = geo + + def as_dict(self): + dict = self.__dict__ + dict['hash'] = hashlib.md5(("%s|%s" % (self.address, self.name)).encode('utf-8')).hexdigest() + return dict + + +class AcmeOffer: + def __init__(self, name='', country='', pharmacy=None, price=None): + self.name = name + self.country = country + self.pharmacy = pharmacy + self.price = price + + def as_dict(self): + dict = self.__dict__ + dict['pharmacy'] = self.pharmacy.as_dict() + return dict + + +def search(query): + url = "https://www.acmespb.ru/lib/autocomplete.php?term=" + urllib.parse.quote(query) + r = session.get(url, allow_redirects=False) + if r.text == "": + return [] + + r.encoding = "utf-8" + return json.loads(r.text) + + +def trade_names(query): + url = "https://www.acmespb.ru/search.php" + r = session.post(url, {"free_str": query}, allow_redirects=False) + if r.status_code != 301: + raise AcmeException("status_code is %d" % (r.status_code,)) + if '/trade/' not in r.headers["location"]: + return r.headers["location"], None + + r = session.get(r.headers["location"], allow_redirects=False) + r.encoding = "utf-8" + soup = BeautifulSoup(r.text, "html.parser") + trades = soup.find(id="trades") + return None, [opt.string for opt in trades.find_all("option") if opt["value"] != "all"] + + +def _get_location(query): + url = "https://www.acmespb.ru/search.php" + data = {"free_str": query} + r = session.post(url, data, allow_redirects=False) + return r.headers["location"] + + +def offers(query, target_url=None, page=1): + if target_url is None: + target_url = _get_location(query) + + data = { + "free_str": query, + "page": page + } + r = session.post(target_url, data, allow_redirects=False) + r.encoding = "utf-8" + if r.status_code != 200: + raise AcmeException("status_code is %d, expected 200" % (r.status_code,)) + + pages = 1 + + soup = BeautifulSoup(r.text, "html.parser") + p = soup.find("p", class_="red") + if p: + total_matches = int(re.findall("([0-9]+)", p.string)[0]) + pages = math.ceil(total_matches / per_page) + + offers = [] + for trow in soup.find_all('div', class_='trow'): + if 'thead' in trow['class']: + continue + + name = trow.select_one('.cell.name p.sra').text + country = trow.select_one('.cell.country').text + phname = trow.select_one('.cell.pharm a').text + price = float(trow.select_one('.cell.pricefull').text) + + # parse address, geo coordinates and phone number + addr_div = trow.select_one('.cell.address') + phone = re.findall('тел\.([^<]+)', addr_div.text)[0].strip() + + addr_link = addr_div.select_one('a') + address = addr_link.text + + geo = re.findall('text=([0-9\.]+),([0-9\.]+)', addr_link['href'])[0] + geo = list(map(lambda x: float(x), geo)) + + acmepharm = AcmePharmacy(name=phname, address=address, phone=phone, geo=geo) + acmeoffer = AcmeOffer(name=name, country=country, price=price, pharmacy=acmepharm) + + offers.append(acmeoffer) + + return target_url, pages, offers \ No newline at end of file diff --git a/app/static/app.js b/app/static/app.js new file mode 100644 index 0000000..bc4b89e --- /dev/null +++ b/app/static/app.js @@ -0,0 +1,191 @@ +class Search { + constructor() { + this.searchDebounced = _.debounce((query) => { + if (query.length < 3) + return; + this.socket.emit('get_hints', { + id: this.updateRequestId(), + query + }); + }, 150); + + let field = document.getElementById('queryInput'); + let btn = document.getElementById('querySubmit'); + + this.autoComplete = new Autocomplete(field, { + data: [], + maximumItems: 10, + onInput: (value) => { + this.searchDebounced(value); + }, + onSelectItem: ({label}) => { + // console.log('selected:', label) + }, + highlightClass: 'text-danger' + }); + + btn.addEventListener('click', this.onSubmit); + field.addEventListener('keydown', this.onInputKeyDown); + + this.btn = btn; + this.field = field; + + this.socket = io(); + this.socket.on('hints', this.onHints); + this.socket.on('offers', this.onOffers) + } + + updateRequestId() { + this.requestId = requestId(); + return this.requestId; + } + + onInputKeyDown = (e) => { + if (e.keyCode === 10 || e.keyCode === 13) + this.onSubmit(); + } + + onSubmit = (e) => { + if (this.isLocked()) + return; + + this.lockButton('Загрузка...'); + + gMaps.removeAllPoints(); + this.socket.emit('get_offers', { + id: this.updateRequestId(), + query: this.field.value + }); + } + + onHints = (data) => { + if (data.id !== this.requestId) + return; + + this.unlockButton(); + + if (data.error) { + console.warn(data.error); + return; + } + + this.autoComplete.setData(data.response.map(item => { + return {label: item, value: ''}; + })); + this.autoComplete.renderIfNeeded(); + } + + onOffers = (data) => { + if (data.id !== this.requestId) + return; + + if (data.end) { + this.unlockButton(); + return; + } else { + this.lockButton(data.pages > 1 ? `${data.page} из ${data.pages}` : null); + } + + for (let offer of data.offers) + gMaps.addOffer(offer); + } + + isLocked() { + return this.btn.classList.contains('disabled'); + } + + lockButton(text) { + if (text !== null) + this.btn.innerText = text; + this.btn.classList.add('disabled'); + } + + unlockButton() { + this.btn.classList.remove('disabled'); + this.btn.innerText = 'Поиск'; + } +} + + +class Maps { + constructor() { + /** + * @type {ymaps.Map} + */ + this.map = null; + ymaps.ready(this.onInit); + + this.places = {}; + } + + onInit = () => { + this.map = new ymaps.Map("mapContainer", { + center: [59.94, 30.32], + zoom: 11 + }); + this.map.controls.remove('searchControl'); + } + + addPoint({geo, offersRef, hint, pharmacyName, pharmacyAddress, pharmacyPhone}) { + let mark = new ymaps.Placemark(geo, { + hintContent: hint, + }, { + preset: 'islands#dotIcon', + openEmptyBalloon: true, + iconColor: '#3caa3c' + }); + mark.events.add('balloonopen', e => { + let lines = offersRef.map(offer => { + return `${offer.name} (${offer.price} руб.)` + }); + let html = `${pharmacyName}
`; + html += `${pharmacyAddress}
`; + html += `тел: ${pharmacyPhone}

`; + html += lines.join('\n'); + mark.properties.set('balloonContent', html); + }); + this.map.geoObjects.add(mark); + return mark; + } + + removeAllPoints() { + this.map.geoObjects.removeAll(); + } + + addOffer(offer) { + // console.log('[addOffer]', offer); + let hash = offer.pharmacy.hash; + if (hash in this.places) + this.places[hash].offers.push(offer); + else { + this.places[hash] = { + offers: [offer], + }; + this.places[hash].mark = this.addPoint({ + geo: offer.pharmacy.geo, + hint: offer.pharmacy.name, + pharmacyName: offer.pharmacy.name, + pharmacyAddress: offer.pharmacy.address, + pharmacyPhone: offer.pharmacy.phone, + offersRef: this.places[hash].offers + }); + } + } +} + + +function requestId() { + return _.random(1, 99999999); +} + + +let gMaps, gSearch; + +window.addEventListener('DOMContentLoaded', function() { + gSearch = new Search(); + gMaps = new Maps(); + + // document.getElementById('test').addEventListener('click', () => { + // gMaps.addTestPoint(); + // }); +}); \ No newline at end of file diff --git a/app/static/autocomplete.js b/app/static/autocomplete.js new file mode 100644 index 0000000..87b42a5 --- /dev/null +++ b/app/static/autocomplete.js @@ -0,0 +1,135 @@ +const DEFAULTS = { + treshold: 2, + maximumItems: 5, + highlightTyped: true, + highlightClass: 'text-primary', +}; + +class Autocomplete { + constructor(field, options) { + this.field = field; + this.options = Object.assign({}, DEFAULTS, options); + this.dropdown = null; + + field.parentNode.classList.add('dropdown'); + field.setAttribute('data-toggle', 'dropdown'); + field.classList.add('dropdown-toggle'); + + const dropdown = ce(``); + if (this.options.dropdownClass) + dropdown.classList.add(this.options.dropdownClass); + + insertAfter(dropdown, field); + + this.dropdown = new bootstrap.Dropdown(field, this.options.dropdownOptions) + + field.addEventListener('click', (e) => { + if (this.createItems() === 0) { + // prevent show empty + e.stopPropagation(); + this.dropdown.hide(); + // field.dropdown('hide'); + } + }); + + field.addEventListener('input', () => { + if (this.options.onInput) + this.options.onInput(this.field.value); + this.renderIfNeeded(); + }); + + field.addEventListener('keydown', (e) => { + if (e.keyCode === 27) { + this.dropdown.hide(); + return; + } + }); + } + + setData(data) { + this.options.data = data; + } + + renderIfNeeded() { + if (this.createItems() > 0) { + this.dropdown.show(); + // field.dropdown('show'); + } else { + // sets up positioning + this.field.click(); + } + } + + createItem(lookup, item) { + let label; + if (this.options.highlightTyped) { + const idx = item.label.toLowerCase().indexOf(lookup.toLowerCase()); + const className = Array.isArray(this.options.highlightClass) ? this.options.highlightClass.join(' ') + : (typeof this.options.highlightClass == 'string' ? this.options.highlightClass : '') + label = item.label.substring(0, idx) + + `${item.label.substring(idx, idx + lookup.length)}` + + item.label.substring(idx + lookup.length, item.label.length); + } else { + label = item.label; + } + return ce(``); + } + + createItems() { + const lookup = this.field.value; + if (lookup.length < this.options.treshold) { + this.dropdown.hide(); + // field.dropdown('hide'); + return 0; + } + + const items = this.field.nextSibling; + items.innerHTML = ''; + + let count = 0; + for (let i = 0; i < this.options.data.length; i++) { + const {label, value} = this.options.data[i]; + const item = {label, value}; + if (item.label.toLowerCase().indexOf(lookup.toLowerCase()) >= 0) { + items.appendChild(this.createItem(lookup, item)); + if (this.options.maximumItems > 0 && ++count >= this.options.maximumItems) + break; + } + } + + this.field.nextSibling.querySelectorAll('.dropdown-item').forEach((item) => { + item.addEventListener('click', (e) => { + let dataValue = e.target.getAttribute('data-value'); + this.field.value = e.target.innerText; + if (this.options.onSelectItem) { + this.options.onSelectItem({ + value: e.target.value, + label: e.target.innerText, + }); + } + this.dropdown.hide(); + }) + }); + + return items.childNodes.length; + } +} + +/** + * @param html + * @returns {Node} + */ +function ce(html) { + let div = document.createElement('div'); + div.innerHTML = html; + return div.firstChild; +} + +/** + * @param elem + * @param refElem + * @returns {*} + */ +function insertAfter(elem, refElem) { + return refElem.parentNode.insertBefore(elem, refElem.nextSibling) +} \ No newline at end of file diff --git a/app/static/style.css b/app/static/style.css new file mode 100644 index 0000000..303ffdc --- /dev/null +++ b/app/static/style.css @@ -0,0 +1,17 @@ +/*.acme-container {*/ +/* margin-top: 1.5rem;*/ +/*}*/ + +#test { + position: absolute; + color: #fff; + background-color: red; + opacity: 0.25; + top: 0; + right: 0; + padding: 5px 8px; + cursor: pointer; +} +#test:hover { + opacity: 1; +} \ No newline at end of file diff --git a/app/templates/base.html b/app/templates/base.html new file mode 100644 index 0000000..aeb42c6 --- /dev/null +++ b/app/templates/base.html @@ -0,0 +1,39 @@ + + + + + + + + + + + + + + + + + + + {% block title %}{% endblock %} + + +
+
+
+
+
+ + +
+
+
+
+ +
+
+
+
test
+ + \ No newline at end of file diff --git a/app/templates/index.html b/app/templates/index.html new file mode 100644 index 0000000..5cb6467 --- /dev/null +++ b/app/templates/index.html @@ -0,0 +1,2 @@ +{% extends "base.html" %} + diff --git a/app/test.py b/app/test.py new file mode 100644 index 0000000..a0481e2 --- /dev/null +++ b/app/test.py @@ -0,0 +1,15 @@ +import acmespb +import sys +from pprint import pprint + +if __name__ == "__main__": + #pprint(acmespb.trade_names("Марена красильная корневища и корни")) + page = 1 + pages = 0 + target_url = None + while pages == 0 or page <= pages: + target_url, pages, offers = acmespb.offers("Верошпирон", page=page, target_url=target_url) + print("[%d] pages=%d, target_url=%s" % (page, pages, target_url)) + for offer in offers: + print(offer.as_dict()) + page += 1 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..41d652e --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +requests~=2.25.1 +requests[socks] +beautifulsoup4~=4.9.3 +Flask~=1.1.2 \ No newline at end of file diff --git a/server.py b/server.py new file mode 100644 index 0000000..a243faa --- /dev/null +++ b/server.py @@ -0,0 +1,7 @@ +#!/bin/env python +from app import create_app, socketio + +app = create_app() + +if __name__ == '__main__': + socketio.run(app) -- cgit v1.2.3