From 56523ccc454697e4523a94654750ad82dea798c6 Mon Sep 17 00:00:00 2001 From: fmk17 <fmk17@inf.ufpr.br> Date: Tue, 7 Jun 2022 11:11:43 -0300 Subject: [PATCH] Add docker, fix menu crawling --- .dockerignore | 2 ++ .pre-commit-config.yaml | 2 +- Dockerfile | 9 +++++++++ crawler.py | 21 +++++++++++++++++---- docker-compose.override.yml | 7 +++++++ docker-compose.yml | 4 ++++ model.py | 4 ++-- requirements.txt | 2 +- 8 files changed, 43 insertions(+), 8 deletions(-) create mode 100644 .dockerignore create mode 100644 Dockerfile create mode 100644 docker-compose.override.yml create mode 100644 docker-compose.yml diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..0c4323f --- /dev/null +++ b/.dockerignore @@ -0,0 +1,2 @@ +.mypy_cache +__pycache__ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f79bcf3..8277734 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,7 +7,7 @@ repos: - id: trailing-whitespace # Black formats the Python code - repo: https://github.com/psf/black - rev: 22.1.0 + rev: 22.3.0 hooks: - id: black # Flake8 lints the Python code diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..983dc4f --- /dev/null +++ b/Dockerfile @@ -0,0 +1,9 @@ +FROM debian:bullseye +WORKDIR /usr/src/ru-bot-telegram +RUN apt-get update && \ + apt-get install -y \ + python3-pip entr +COPY requirements.txt . +RUN pip3 install --no-cache-dir -r requirements.txt +COPY . . +CMD ["python3", "main.py"] diff --git a/crawler.py b/crawler.py index 417e139..6af929f 100644 --- a/crawler.py +++ b/crawler.py @@ -1,6 +1,8 @@ +import logging import re from collections import namedtuple from datetime import date, datetime, timedelta +from itertools import dropwhile from typing import Dict, List, Set import requests @@ -15,6 +17,9 @@ LocationDays = namedtuple("LocationDays", "days, location, update_datetime") cached_update_times: Dict[Location, datetime] = dict() cached_responses: Dict[Location, LocationDays] = dict() +DATE_REGEX = r": (\d{1,2})\/(\d{1,2})\/(\d{4})" +logger = logging.getLogger("crawler") + def get_location_days( location: Location, menu_item_indicators: List[MenuItemIndicator] @@ -35,12 +40,15 @@ def get_location_days( post = soup.select_one("#post div:nth-child(3)") post_children = iter(post.children) post_children = (node for node in post_children if node.text.strip() != "") + post_children = dropwhile( + lambda n: not re.search(DATE_REGEX, n.text), post_children + ) days = [] for date_node, table_node in zip(post_children, post_children): date_text = date_node.text - date_re = re.search(r"(\d{1,2})\/(\d{1,2})\/(\d{4})", date_text) + date_re = re.search(DATE_REGEX, date_text) if date_re is None: break d, m, y = map(int, date_re.groups()) @@ -53,7 +61,10 @@ def get_location_days( for child in item_nodes: if child.name == "br" and item_name is not None: items.append( - MenuItem(name=item_name, indicators=item_indicators) + MenuItem( + name=item_name, + indicators=frozenset(item_indicators), + ) ) item_name = None item_indicators = set() @@ -75,9 +86,11 @@ def get_location_days( item_name = child.text.strip() if item_name is not None: items.append( - MenuItem(name=item_name, indicators=item_indicators) + MenuItem( + name=item_name, indicators=frozenset(item_indicators) + ) ) - menus.append(Menu(meal_name=title_node.text, items=items)) + menus.append(Menu(meal_name=title_node.text, items=tuple(items))) days.append(Day(date=date(y, m, d), date_raw=date_text, menus=menus)) cached_update_times[location] = datetime.now() diff --git a/docker-compose.override.yml b/docker-compose.override.yml new file mode 100644 index 0000000..3dd6582 --- /dev/null +++ b/docker-compose.override.yml @@ -0,0 +1,7 @@ +version: "3.9" +services: + app: + volumes: + - ".:/usr/src/ru-bot-telegram" + command: ["bash", "-c", "ls *.py | entr -r python3 main.py"] + restart: unless-stopped diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..3de3267 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,4 @@ +version: "3.9" +services: + app: + build: . diff --git a/model.py b/model.py index 81a3efd..583d629 100644 --- a/model.py +++ b/model.py @@ -1,7 +1,7 @@ from dataclasses import dataclass from datetime import datetime, time from enum import Enum -from typing import Set +from typing import FrozenSet class WeekDay(Enum): @@ -52,7 +52,7 @@ class MenuItemIndicator: @dataclass(frozen=True) class MenuItem: name: str - indicators: Set[MenuItemIndicator] + indicators: FrozenSet[MenuItemIndicator] @dataclass(frozen=True) diff --git a/requirements.txt b/requirements.txt index 2e8f5e9..a1ff01c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,4 +16,4 @@ tornado==6.1 tzdata==2021.5 tzlocal==4.1 urllib3==1.26.8 -lxml==4.8.0 +lxml==4.9.0 -- GitLab