diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000000000000000000000000000000000000..0c4323f054d0fb145a13731223fdc48780485bc9 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,2 @@ +.mypy_cache +__pycache__ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f79bcf3886300ec6e066f1d2ea852e8f3497e58c..82777341876d47c6e994bd562b18f4fbc7bc9149 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,7 +7,7 @@ repos: - id: trailing-whitespace # Black formats the Python code - repo: https://github.com/psf/black - rev: 22.1.0 + rev: 22.3.0 hooks: - id: black # Flake8 lints the Python code diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..983dc4fe79378846a843a54788888bfb9ce4a96c --- /dev/null +++ b/Dockerfile @@ -0,0 +1,9 @@ +FROM debian:bullseye +WORKDIR /usr/src/ru-bot-telegram +RUN apt-get update && \ + apt-get install -y \ + python3-pip entr +COPY requirements.txt . +RUN pip3 install --no-cache-dir -r requirements.txt +COPY . . +CMD ["python3", "main.py"] diff --git a/crawler.py b/crawler.py index 417e139004cda71ed053290767f2c40d79d2c364..6af929f24cc63cf0dad4f88e9e85e72b47e9cfbe 100644 --- a/crawler.py +++ b/crawler.py @@ -1,6 +1,8 @@ +import logging import re from collections import namedtuple from datetime import date, datetime, timedelta +from itertools import dropwhile from typing import Dict, List, Set import requests @@ -15,6 +17,9 @@ LocationDays = namedtuple("LocationDays", "days, location, update_datetime") cached_update_times: Dict[Location, datetime] = dict() cached_responses: Dict[Location, LocationDays] = dict() +DATE_REGEX = r": (\d{1,2})\/(\d{1,2})\/(\d{4})" +logger = logging.getLogger("crawler") + def get_location_days( location: Location, menu_item_indicators: List[MenuItemIndicator] @@ -35,12 +40,15 @@ def get_location_days( post = soup.select_one("#post div:nth-child(3)") post_children = iter(post.children) post_children = (node for node in post_children if node.text.strip() != "") + post_children = dropwhile( + lambda n: not re.search(DATE_REGEX, n.text), post_children + ) days = [] for date_node, table_node in zip(post_children, post_children): date_text = date_node.text - date_re = re.search(r"(\d{1,2})\/(\d{1,2})\/(\d{4})", date_text) + date_re = re.search(DATE_REGEX, date_text) if date_re is None: break d, m, y = map(int, date_re.groups()) @@ -53,7 +61,10 @@ def get_location_days( for child in item_nodes: if child.name == "br" and item_name is not None: items.append( - MenuItem(name=item_name, indicators=item_indicators) + MenuItem( + name=item_name, + indicators=frozenset(item_indicators), + ) ) item_name = None item_indicators = set() @@ -75,9 +86,11 @@ def get_location_days( item_name = child.text.strip() if item_name is not None: items.append( - MenuItem(name=item_name, indicators=item_indicators) + MenuItem( + name=item_name, indicators=frozenset(item_indicators) + ) ) - menus.append(Menu(meal_name=title_node.text, items=items)) + menus.append(Menu(meal_name=title_node.text, items=tuple(items))) days.append(Day(date=date(y, m, d), date_raw=date_text, menus=menus)) cached_update_times[location] = datetime.now() diff --git a/docker-compose.override.yml b/docker-compose.override.yml new file mode 100644 index 0000000000000000000000000000000000000000..3dd65821115876991123364d837a414c4a578d1a --- /dev/null +++ b/docker-compose.override.yml @@ -0,0 +1,7 @@ +version: "3.9" +services: + app: + volumes: + - ".:/usr/src/ru-bot-telegram" + command: ["bash", "-c", "ls *.py | entr -r python3 main.py"] + restart: unless-stopped diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000000000000000000000000000000000000..3de3267eea82b955993b422bfff5c3280d02aa47 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,4 @@ +version: "3.9" +services: + app: + build: . diff --git a/model.py b/model.py index 81a3efd909165323e9a4c7ca61fedb2a226872bc..583d629932a1b88a2639f089e8dc9a906e0a4263 100644 --- a/model.py +++ b/model.py @@ -1,7 +1,7 @@ from dataclasses import dataclass from datetime import datetime, time from enum import Enum -from typing import Set +from typing import FrozenSet class WeekDay(Enum): @@ -52,7 +52,7 @@ class MenuItemIndicator: @dataclass(frozen=True) class MenuItem: name: str - indicators: Set[MenuItemIndicator] + indicators: FrozenSet[MenuItemIndicator] @dataclass(frozen=True) diff --git a/requirements.txt b/requirements.txt index 2e8f5e94c1dd3fd747dbfa97dd5095247ad6ad8c..a1ff01c0b07dc6166384c21dc39dbfb880ca2806 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,4 +16,4 @@ tornado==6.1 tzdata==2021.5 tzlocal==4.1 urllib3==1.26.8 -lxml==4.8.0 +lxml==4.9.0