Skip to content
Snippets Groups Projects
Commit 56523ccc authored by fmk17's avatar fmk17
Browse files

Add docker, fix menu crawling

parent 6a212abc
No related branches found
No related tags found
No related merge requests found
.mypy_cache
__pycache__
......@@ -7,7 +7,7 @@ repos:
- id: trailing-whitespace
# Black formats the Python code
- repo: https://github.com/psf/black
rev: 22.1.0
rev: 22.3.0
hooks:
- id: black
# Flake8 lints the Python code
......
FROM debian:bullseye
WORKDIR /usr/src/ru-bot-telegram
RUN apt-get update && \
apt-get install -y \
python3-pip entr
COPY requirements.txt .
RUN pip3 install --no-cache-dir -r requirements.txt
COPY . .
CMD ["python3", "main.py"]
import logging
import re
from collections import namedtuple
from datetime import date, datetime, timedelta
from itertools import dropwhile
from typing import Dict, List, Set
import requests
......@@ -15,6 +17,9 @@ LocationDays = namedtuple("LocationDays", "days, location, update_datetime")
cached_update_times: Dict[Location, datetime] = dict()
cached_responses: Dict[Location, LocationDays] = dict()
DATE_REGEX = r": (\d{1,2})\/(\d{1,2})\/(\d{4})"
logger = logging.getLogger("crawler")
def get_location_days(
location: Location, menu_item_indicators: List[MenuItemIndicator]
......@@ -35,12 +40,15 @@ def get_location_days(
post = soup.select_one("#post div:nth-child(3)")
post_children = iter(post.children)
post_children = (node for node in post_children if node.text.strip() != "")
post_children = dropwhile(
lambda n: not re.search(DATE_REGEX, n.text), post_children
)
days = []
for date_node, table_node in zip(post_children, post_children):
date_text = date_node.text
date_re = re.search(r"(\d{1,2})\/(\d{1,2})\/(\d{4})", date_text)
date_re = re.search(DATE_REGEX, date_text)
if date_re is None:
break
d, m, y = map(int, date_re.groups())
......@@ -53,7 +61,10 @@ def get_location_days(
for child in item_nodes:
if child.name == "br" and item_name is not None:
items.append(
MenuItem(name=item_name, indicators=item_indicators)
MenuItem(
name=item_name,
indicators=frozenset(item_indicators),
)
)
item_name = None
item_indicators = set()
......@@ -75,9 +86,11 @@ def get_location_days(
item_name = child.text.strip()
if item_name is not None:
items.append(
MenuItem(name=item_name, indicators=item_indicators)
MenuItem(
name=item_name, indicators=frozenset(item_indicators)
)
)
menus.append(Menu(meal_name=title_node.text, items=items))
menus.append(Menu(meal_name=title_node.text, items=tuple(items)))
days.append(Day(date=date(y, m, d), date_raw=date_text, menus=menus))
cached_update_times[location] = datetime.now()
......
version: "3.9"
services:
app:
volumes:
- ".:/usr/src/ru-bot-telegram"
command: ["bash", "-c", "ls *.py | entr -r python3 main.py"]
restart: unless-stopped
version: "3.9"
services:
app:
build: .
from dataclasses import dataclass
from datetime import datetime, time
from enum import Enum
from typing import Set
from typing import FrozenSet
class WeekDay(Enum):
......@@ -52,7 +52,7 @@ class MenuItemIndicator:
@dataclass(frozen=True)
class MenuItem:
name: str
indicators: Set[MenuItemIndicator]
indicators: FrozenSet[MenuItemIndicator]
@dataclass(frozen=True)
......
......@@ -16,4 +16,4 @@ tornado==6.1
tzdata==2021.5
tzlocal==4.1
urllib3==1.26.8
lxml==4.8.0
lxml==4.9.0
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment