Skip to content
Snippets Groups Projects
Commit 5790d86e authored by fmk17's avatar fmk17
Browse files

Refactor code

parent fdefd2a3
No related branches found
No related tags found
No related merge requests found
repos:
# Fixes the spaces
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.1.0
hooks:
- id: end-of-file-fixer
- id: trailing-whitespace
# Black formats the Python code
- repo: https://github.com/psf/black
rev: 22.1.0
hooks:
- id: black
# Flake8 lints the Python code
- repo: https://github.com/pycqa/flake8
rev: 4.0.1
hooks:
- id: flake8
# isort sorts the imports
- repo: https://github.com/PyCQA/isort
rev: 5.10.1
hooks:
- id: isort
name: isort (python)
# mypy checks types
- repo: https://github.com/pre-commit/mirrors-mypy
rev: 'v0.931'
hooks:
- id: mypy
additional_dependencies: [types-requests==2.27.9]
import re
from collections import namedtuple from collections import namedtuple
from datetime import date, datetime, timedelta from datetime import date, datetime, timedelta
from bs4 import BeautifulSoup from typing import Dict, Set
from enum import Enum
import requests import requests
import re from bs4 import BeautifulSoup
import itertools
Day = namedtuple('Day', 'date, date_raw, menus') from model import Location, MenuItem, MenuItemIndicator
Menu = namedtuple('Menu', 'meal, items')
class Location(Enum): Menu = namedtuple("Menu", "meal_name, items")
CENTRAL = 'https://pra.ufpr.br/ru/ru-central/' Day = namedtuple("Day", "date, date_raw, menus")
POLITECNICO = 'https://pra.ufpr.br/ru/ru-centro-politecnico/' LocationDays = namedtuple("LocationDays", "days, location, update_datetime")
BOTANICO = 'https://pra.ufpr.br/ru/cardapio-ru-jardim-botanico/'
AGRARIAS = 'https://pra.ufpr.br/ru/cardapio-ru-agrarias/'
class Meal(Enum): cached_update_times: Dict[Location, datetime] = dict()
BREAKFAST = "CAFÉ DA MANHÃ" cached_responses: Dict[Location, LocationDays] = dict()
LUNCH = "ALMOÇO"
DINNER = "JANTAR"
cached_update_times = dict()
cached_responses = dict()
def get_menus_by_days(location: Location): def get_location_days(location: Location) -> LocationDays:
global cached_responses global cached_responses
global cached_update_times global cached_update_times
if location in cached_update_times and cached_update_times[location] + timedelta(minutes=5) > datetime.now(): if (
return (cached_responses[location], cached_update_times[location]) location in cached_update_times
and cached_update_times[location] + timedelta(minutes=5)
> datetime.now()
):
return cached_responses[location]
response = requests.get(location.value) response = requests.get(location.url)
soup = BeautifulSoup(response.text, 'lxml') soup = BeautifulSoup(response.text, "lxml")
post = soup.select_one('#post div:nth-child(3)') post = soup.select_one("#post div:nth-child(3)")
post_children = iter(post.children) post_children = iter(post.children)
next(post_children) post_children = (node for node in post_children if node.text.strip() != "")
days = [] days = []
for date_node, _, table, _ in zip(post_children, post_children, post_children, post_children): for date_node, table_node in zip(post_children, post_children):
date_text = date_node.text date_text = date_node.text
date_re = re.search(r'(\d{1,2})\/(\d{1,2})\/(\d{4})', date_text) date_re = re.search(r"(\d{1,2})\/(\d{1,2})\/(\d{4})", date_text)
if date_re is None: break if date_re is None:
break
d, m, y = map(int, date_re.groups()) d, m, y = map(int, date_re.groups())
table_children = iter(table.select('td')) table_children = iter(table_node.select("td"))
menus = [] menus = []
for title_node, items in zip(table_children, table_children): for title_node, item_nodes in zip(table_children, table_children):
menus.append(Menu(meal=Meal(title_node.text), items=list(items.stripped_strings))) items = []
days.append( item_name = None
Day(date=date(y, m, d), date_raw=date_text, menus=menus) item_indicators: Set[MenuItemIndicator] = set()
for child in item_nodes:
if child.name == "br" and item_name is not None:
items.append(
MenuItem(name=item_name, indicators=item_indicators)
)
item_name = None
item_indicators = set()
elif child.name == "a":
if not child.select_one("img"):
continue
href = child["href"]
if "Vegano" in href:
item_indicators.add(MenuItemIndicator.VEGAN)
elif "Gluten" in href:
item_indicators.add(MenuItemIndicator.GLUTEN)
elif "Leite" in href:
item_indicators.add(MenuItemIndicator.LACTOSE)
elif "animal" in href:
item_indicators.add(MenuItemIndicator.ANIMAL)
elif "Ovo" in href:
item_indicators.add(MenuItemIndicator.EGG)
elif "Mel" in href:
item_indicators.add(MenuItemIndicator.HONEY)
elif "Alergenicos" in href:
item_indicators.add(MenuItemIndicator.ALERGIC)
elif child.text.strip() != "":
item_name = child.text.strip()
if item_name is not None:
items.append(
MenuItem(name=item_name, indicators=item_indicators)
) )
menus.append(Menu(meal_name=title_node.text, items=items))
days.append(Day(date=date(y, m, d), date_raw=date_text, menus=menus))
cached_responses[location] = days
cached_update_times[location] = datetime.now() cached_update_times[location] = datetime.now()
return (days, cached_update_times[location]) location_days = LocationDays(
days=days,
location=location,
update_datetime=cached_update_times[location],
)
cached_responses[location] = location_days
return location_days
from crawler import Location, Meal
from collections import namedtuple
from pprint import pformat
import sqlite3
import logging import logging
import sqlite3
from datetime import datetime, time
from pprint import pformat
from textwrap import dedent
from zoneinfo import ZoneInfo
from model import Location, Meal, Schedule, WeekDay
from seed import get_seed_locations, get_seed_meals
CURITIBA_TZ = ZoneInfo("America/Sao_Paulo")
def parse_curitiba_time(formatted_time):
hour, minute = map(int, formatted_time.split(":"))
return time(hour, minute, tzinfo=CURITIBA_TZ)
logger = logging.getLogger("database")
Schedule = namedtuple('Schedule', 'time, day_week, location, meal, user_id, created_at') logger = logging.getLogger("database")
connection = sqlite3.connect("db", isolation_level=None, check_same_thread=False) connection = sqlite3.connect(
"db", isolation_level=None, check_same_thread=False
)
# Create tables # Create tables
with connection: with connection:
connection.execute(''' connection.execute(
dedent(
"""\
CREATE TABLE IF NOT EXISTS location (
name TEXT NOT NULL PRIMARY KEY,
command TEXT NOT NULL,
ordering INT NOT NULL,
url TEXT NOT NULL
)
"""
)
)
connection.execute(
dedent(
"""\
CREATE TABLE IF NOT EXISTS meal (
location_name TEXT NOT NULL
REFERENCES location (name)
DEFERRABLE INITIALLY DEFERRED,
name TEXT NOT NULL,
week_day INT NOT NULL,
ordering INT NOT NULL,
start_time TEXT NOT NULL,
end_time TEXT NOT NULL,
PRIMARY KEY (location_name, name, week_day)
)
"""
)
)
connection.execute(
dedent(
"""\
CREATE TABLE IF NOT EXISTS schedule ( CREATE TABLE IF NOT EXISTS schedule (
time TEXT NOT NULL, time TEXT NOT NULL,
day_week INT NOT NULL, location_name TEXT NOT NULL REFERENCES location (name),
location TEXT NOT NULL, meal_name TEXT NOT NULL,
meal TEXT NOT NULL, week_day INT NOT NULL,
user_id INT NOT NULL, user_id INT NOT NULL,
created_at DATETIME NOT NULL created_at DATETIME NOT NULL,
PRIMARY KEY (time, location_name, meal_name, week_day, user_id),
FOREIGN KEY (location_name, meal_name, week_day)
REFERENCES meal(location_name, name, week_day)
DEFERRABLE INITIALLY DEFERRED
)
"""
) )
''') )
cur = connection.cursor()
cur.execute("BEGIN")
cur.execute("DELETE FROM location")
seed_locations = get_seed_locations()
for location in seed_locations:
cur.execute(
dedent(
"""\
INSERT INTO location (name, command, ordering, url)
VALUES (?, ?, ?, ?)
"""
),
location,
)
cur.execute("DELETE FROM meal")
seed_meals = get_seed_meals()
for meal in seed_meals:
cur.execute(
dedent(
"""\
INSERT INTO meal
(location_name, name, week_day, ordering, start_time, end_time)
VALUES
(?, ?, ?, ?, ?, ?)
"""
),
meal,
)
cur.execute("COMMIT")
def get_schedules_for_user(user_id):
cur = connection.execute(''' def get_schedules_query(end_query, end_params):
SELECT time, day_week, location, meal, user_id, created_at cur = connection.execute(
dedent(
f"""\
SELECT
schedule.time,
schedule.week_day,
schedule.location_name,
location.command as location_command,
location.ordering as location_ordering,
location.url AS location_url,
schedule.meal_name,
meal.ordering as meal_ordering,
meal.start_time as meal_start_time,
meal.end_time as meal_end_time,
schedule.user_id,
schedule.created_at
FROM schedule FROM schedule
WHERE user_id = ? INNER JOIN location
''', (user_id,)) ON location.name = schedule.location_name
INNER JOIN meal
ON meal.location_name = schedule.location_name
AND meal.name = schedule.meal_name
AND meal.week_day = schedule.week_day
{end_query}
"""
),
end_params,
)
rows = cur.fetchall() rows = cur.fetchall()
return [Schedule( return [
row[0], Schedule(
row[1], time=time,
Location[row[2]], week_day=WeekDay(week_day),
Meal[row[3]], meal=Meal(
row[4], location=Location(
row[5] name=location_name,
) for row in rows] command=location_command,
ordering=location_ordering,
url=location_url,
),
name=meal_name,
week_day=WeekDay(week_day),
ordering=meal_ordering,
start_time=meal_start_time,
end_time=meal_end_time,
),
user_id=user_id,
created_at=created_at,
)
for (
time,
week_day,
location_name,
location_command,
location_ordering,
location_url,
meal_name,
meal_ordering,
meal_start_time,
meal_end_time,
user_id,
created_at,
) in rows
]
def get_schedules_for_user(user_id):
return get_schedules_query(
dedent(
"""\
WHERE user_id = ?
ORDER BY location.ordering, meal.ordering, meal.week_day
"""
),
(user_id,),
)
def get_schedules_matching_datetime(dt: datetime):
time = dt.strftime("%H:%M")
week_day = dt.weekday()
logging.info(
f"Getting schedules matching time {time} AND week_day {week_day}"
)
return get_schedules_query(
dedent(
"""\
WHERE schedule.time = ? AND schedule.week_day = ?
ORDER BY schedule.user_id, location.ordering, meal.ordering
"""
),
(time, week_day),
)
def upsert_schedule(schedule): def upsert_schedule(schedule):
cur = connection.execute(''' cur = connection.execute(
dedent(
"""\
SELECT created_at SELECT created_at
FROM schedule FROM schedule
WHERE time = ? and day_week = ? and location = ? and meal = ? and user_id = ? WHERE week_day = ? AND location_name = ?
''', ( AND meal_name = ? AND user_id = ?
schedule.time, """
schedule.day_week, ),
schedule.location.name, (
schedule.week_day.value,
schedule.meal.location.name,
schedule.meal.name, schedule.meal.name,
schedule.user_id schedule.user_id,
)) ),
)
row = cur.fetchone() row = cur.fetchone()
if not row: if not row:
logging.info(f"Inserting {pformat(schedule)}") logging.info(f"Inserting {pformat(schedule)}")
connection.execute(''' connection.execute(
dedent(
"""\
INSERT INTO schedule INSERT INTO schedule
(time, day_week, location, meal, user_id, created_at) (time, week_day, location_name, meal_name, user_id, created_at)
VALUES VALUES
(?, ?, ?, ?, ?, ?) (?, ?, ?, ?, ?, ?)
''', ( """
schedule.time, ),
schedule.day_week, (
schedule.location.name, schedule.time.strftime("%H:%M"),
schedule.week_day.value,
schedule.meal.location.name,
schedule.meal.name, schedule.meal.name,
schedule.user_id, schedule.user_id,
schedule.created_at schedule.created_at,
)) ),
)
else: else:
logging.info(f"Already inserted {pformat(schedule)}") logging.info(f"Already inserted {pformat(schedule)}")
def get_schedules_matching_time(datetime):
time = datetime.strftime('%H:%M')
day_week = datetime.weekday()
logging.info(f"Getting schedules matching time {time} and day_week {day_week}")
cur = connection.execute('''
SELECT time, day_week, location, meal, user_id, created_at
FROM schedule
WHERE time = ? and day_week = ?
''', (time, day_week))
rows = cur.fetchall()
return [Schedule(
row[0],
row[1],
Location[row[2]],
Meal[row[3]],
row[4],
row[5]
) for row in rows]
def delete_all_schedules_from_user(user_id): def delete_all_schedules_from_user(user_id):
connection.execute(''' connection.execute(
dedent(
"""
DELETE FROM schedule DELETE FROM schedule
WHERE user_id = ? WHERE user_id = ?
''', (user_id,)) """
),
(user_id,),
)
def get_locations():
cur = connection.execute(
dedent(
"""\
SELECT name, command, ordering, url FROM location
"""
)
)
rows = cur.fetchall()
return [Location(*row) for row in rows]
def get_location_by_command(command):
cur = connection.execute(
dedent(
"""\
SELECT name, command, ordering, url FROM location
WHERE command = ?
"""
),
(command,),
)
return Location(*row) if (row := cur.fetchone()) is not None else None
def get_meals_query(end_query, end_params=()):
cur = connection.execute(
dedent(
f"""\
SELECT
meal.location_name,
location.command as location_command,
location.ordering as location_ordering,
location.url as location_url,
meal.name,
meal.week_day,
meal.ordering,
meal.start_time,
meal.end_time
FROM meal
INNER JOIN location ON location.name = meal.location_name
{end_query}
"""
),
end_params,
)
rows = cur.fetchall()
return [
Meal(
location=Location(
name=location_name,
command=location_command,
ordering=location_ordering,
url=location_url,
),
name=name,
week_day=WeekDay(week_day),
ordering=ordering,
start_time=parse_curitiba_time(start_time),
end_time=parse_curitiba_time(end_time),
)
for (
location_name,
location_command,
location_ordering,
location_url,
name,
week_day,
ordering,
start_time,
end_time,
) in rows
]
def get_meals():
return get_meals_query(
dedent(
"""\
ORDER BY location.name, meal.ordering, meal.week_day
"""
)
)
def get_meals_by_location_name(location_name):
return get_meals_query(
dedent(
"""\
WHERE meal.location_name = ?
ORDER BY meal.ordering, meal.week_day
"""
),
(location_name,),
)
This diff is collapsed.
model.py 0 → 100644
from dataclasses import dataclass
from datetime import datetime, time
from enum import Enum
from typing import Set
class WeekDay(Enum):
MONDAY = (0, "SEG", "Segunda-feira")
TUESDAY = (1, "TER", "Terça-feira")
WEDNESDAY = (2, "QUA", "Quarta-feira")
THURSDAY = (3, "QUI", "Quinta-feira")
FRIDAY = (4, "SEX", "Sexta-feira")
SATURDAY = (5, "SAB", "Sábado")
SUNDAY = (6, "DOM", "Domingo")
HOLIDAYS = (7, "FER", "Feriado")
def __new__(cls, value, short, long):
obj = object.__new__(cls)
obj._value_ = value
obj.short = short
obj.long = long
return obj
def __lt__(self, other):
if self.__class__ is other.__class__:
return self.value < other.value
return NotImplemented
WorkingWeekDay = [
WeekDay.MONDAY,
WeekDay.TUESDAY,
WeekDay.WEDNESDAY,
WeekDay.THURSDAY,
WeekDay.FRIDAY,
]
class MenuItemIndicator(Enum):
VEGAN = ("🌱", "Indicado para veganos")
GLUTEN = ("🌾", "Não indicado para celíacos por conter glúten")
LACTOSE = (
"🥛",
"Não indicado para intolerantes à lactose por conter lactose",
)
ANIMAL = ("🥩", "Contém produtos de origem animal")
EGG = ("🥚", "Contém ovo")
HONEY = ("🍯", "Contém mel")
ALERGIC = ("⚠️", "Contém produto(s) alergênico(s)")
def __new__(cls, emoji, description):
obj = object.__new__(cls)
obj._value_ = (emoji, description)
obj.emoji = emoji
obj.description = description
return obj
@dataclass(frozen=True)
class MenuItem:
name: str
indicators: Set[MenuItemIndicator]
@dataclass(frozen=True)
class Location:
name: str
command: str
ordering: int
url: str
@dataclass(frozen=True)
class LocationWeekDays:
location: Location
week_day: WeekDay
@dataclass(frozen=True)
class Meal:
location: Location
name: str
week_day: WeekDay
ordering: int
start_time: time
end_time: time
@dataclass(frozen=True)
class Schedule:
time: str # 18:00
week_day: WeekDay
meal: Meal
user_id: int # Telegram user id
created_at: datetime
[tool.black]
line-length = 79
[tool.isort]
profile = "black"
seed.py 0 → 100644
# Este arquivo tem configurações iniciais.
# Os horários de funcionamento podem sofrer alterações.
# Atualizado em 10 de fevereiro de 2022.
from model import WeekDay, WorkingWeekDay
def get_seed_locations():
return [
(
"Centro Politécnico",
"poli",
1,
"https://pra.ufpr.br/ru/ru-centro-politecnico/",
),
("Central", "central", 2, "https://pra.ufpr.br/ru/ru-central/"),
(
"Jardim Botânico",
"botanico",
3,
"https://pra.ufpr.br/ru/cardapio-ru-jardim-botanico/",
),
(
"Agrárias",
"agrarias",
4,
"https://pra.ufpr.br/ru/cardapio-ru-agrarias/",
),
]
def get_seed_meals():
return (
[
("Central", "CAFÉ DA MANHÃ", week_day.value, 1, "06:45", "08:00")
for week_day in WorkingWeekDay
]
+ [
("Central", "ALMOÇO", week_day.value, 2, "11:00", "13:30")
for week_day in WorkingWeekDay
]
+ [
("Central", "JANTAR", week_day.value, 3, "17:30", "19:30")
for week_day in WorkingWeekDay
]
+ [
(
"Central",
"CAFÉ DA MANHÃ",
WeekDay.SATURDAY.value,
1,
"08:15",
"09:15",
)
]
+ [("Central", "ALMOÇO", WeekDay.SATURDAY.value, 2, "11:30", "13:30")]
+ [("Central", "JANTAR", WeekDay.SATURDAY.value, 3, "17:45", "19:30")]
+ [
("Central", "CAFÉ DA MANHÃ", week_day.value, 1, "08:30", "09:30")
for week_day in (WeekDay.SUNDAY, WeekDay.HOLIDAYS)
]
+ [
("Central", "ALMOÇO", week_day.value, 2, "11:30", "13:30")
for week_day in (WeekDay.SUNDAY, WeekDay.HOLIDAYS)
]
+ [
("Central", "JANTAR", week_day.value, 3, "17:45", "19:00")
for week_day in (WeekDay.SUNDAY, WeekDay.HOLIDAYS)
]
+ [
(
"Centro Politécnico",
"CAFÉ DA MANHÃ",
week_day.value,
1,
"06:45",
"08:00",
)
for week_day in WorkingWeekDay
]
+ [
(
"Centro Politécnico",
"ALMOÇO",
week_day.value,
2,
"11:00",
"13:30",
)
for week_day in WorkingWeekDay
]
+ [
(
"Centro Politécnico",
"JANTAR",
week_day.value,
3,
"17:30",
"19:30",
)
for week_day in WorkingWeekDay
]
+ [
("Agrárias", "CAFÉ DA MANHÃ", week_day.value, 1, "06:45", "08:00")
for week_day in WorkingWeekDay
]
+ [
("Agrárias", "ALMOÇO", week_day.value, 2, "11:00", "13:30")
for week_day in WorkingWeekDay
]
+ [
(
"Jardim Botânico",
"CAFÉ DA MANHÃ",
week_day.value,
1,
"06:45",
"08:00",
)
for week_day in WorkingWeekDay
]
+ [
("Jardim Botânico", "ALMOÇO", week_day.value, 2, "11:00", "13:30")
for week_day in WorkingWeekDay
]
+ [
("Jardim Botânico", "JANTAR", week_day.value, 3, "17:30", "19:30")
for week_day in WorkingWeekDay
]
)
[flake8]
ignore = E203, W503
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment