Skip to content
Snippets Groups Projects
Commit f85e6af7 authored by fmk17's avatar fmk17
Browse files

Update crawler

parent e86b6586
No related branches found
No related tags found
No related merge requests found
......@@ -7,7 +7,7 @@ repos:
- id: trailing-whitespace
# Black formats the Python code
- repo: https://github.com/psf/black
rev: 22.1.0
rev: 22.3.0
hooks:
- id: black
# Flake8 lints the Python code
......
import logging
import re
from collections import namedtuple
from datetime import date, datetime, timedelta
from itertools import dropwhile
from typing import Dict, List, Set
import requests
......@@ -15,6 +17,9 @@ LocationDays = namedtuple("LocationDays", "days, location, update_datetime")
cached_update_times: Dict[Location, datetime] = dict()
cached_responses: Dict[Location, LocationDays] = dict()
DATE_REGEX = r": (\d{1,2})\/(\d{1,2})\/(\d{4})"
logger = logging.getLogger("crawler")
def get_location_days(
location: Location, menu_item_indicators: List[MenuItemIndicator]
......@@ -35,12 +40,15 @@ def get_location_days(
post = soup.select_one("#post div:nth-child(3)")
post_children = iter(post.children)
post_children = (node for node in post_children if node.text.strip() != "")
post_children = dropwhile(
lambda n: not re.search(DATE_REGEX, n.text), post_children
)
days = []
for date_node, table_node in zip(post_children, post_children):
date_text = date_node.text
date_re = re.search(r"(\d{1,2})\/(\d{1,2})\/(\d{4})", date_text)
date_re = re.search(DATE_REGEX, date_text)
if date_re is None:
break
d, m, y = map(int, date_re.groups())
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment