Skip to content
Snippets Groups Projects
Commit d0d4b43f authored by fmk17's avatar fmk17
Browse files

Create initial crawler and bot

parents
No related branches found
No related tags found
No related merge requests found
.env
__pycache__
*.log
from collections import namedtuple
from datetime import date, datetime, timedelta
from bs4 import BeautifulSoup
from enum import Enum
import requests
import re
import itertools
Day = namedtuple('Day', 'date, date_raw, meals')
Meal = namedtuple('Meal', 'name, items')
class Location(Enum):
CENTRAL = 'https://pra.ufpr.br/ru/ru-central/'
POLITECNICO = 'https://pra.ufpr.br/ru/ru-centro-politecnico/'
BOTANICO = 'https://pra.ufpr.br/ru/cardapio-ru-jardim-botanico/'
AGRARIAS = 'https://pra.ufpr.br/ru/cardapio-ru-agrarias/'
cached_update_times = dict()
cached_responses = dict()
def get_meals_by_days(location: Location):
global cached_responses
global cached_update_times
if location in cached_update_times and cached_update_times[location] < datetime.now() + timedelta(minutes=5):
return (cached_responses[location], cached_update_times[location])
response = requests.get(location.value)
soup = BeautifulSoup(response.text, 'lxml')
post = soup.select_one('#post div:nth-child(3)')
post_children = iter(post.children)
next(post_children)
days = []
for date_node, _, table, _ in zip(post_children, post_children, post_children, post_children):
date_text = date_node.text
date_re = re.search(r'(\d{1,2})\/(\d{1,2})\/(\d{4})', date_text)
if date_re is None: break
d, m, y = map(int, date_re.groups())
table_children = iter(table.select('td'))
meals = []
for title_node, items in zip(table_children, table_children):
meals.append(Meal(name=title_node.text, items=list(items.stripped_strings)))
days.append(
Day(date=date(y, m, d), date_raw=date_text, meals=meals)
)
cached_responses[location] = days
cached_update_times[location] = datetime.now()
return (days, cached_update_times[location])
main.py 0 → 100644
#!/usr/bin/env python3
from dotenv import load_dotenv
from telegram import Update, ReplyKeyboardMarkup
from telegram.ext import Updater, CommandHandler, CallbackContext
from crawler import get_meals_by_days, Location
import logging
import os
load_dotenv()
file_handler = logging.FileHandler('bot.log')
stream_handler = logging.StreamHandler()
stream_handler.setLevel(logging.DEBUG)
logging.basicConfig(
level=logging.DEBUG,
format='%(asctime)s %(levelname)s %(name)s (%(module)s:%(funcName)s:%(lineno)d) %(message)s',
datefmt='%Y-%m-%mT%H:%M:%S',
handlers=[file_handler, stream_handler])
logger = logging.getLogger("bot")
def start(update: Update, context: CallbackContext) -> None:
"""Sends explanation on how to use the bot."""
update.message.reply_html('''
Olá, eu sou o RU UFPR Bot, o robô de <a href="https://gitlab.c3sl.ufpr.br/caad/ru-bot-telegram">código aberto</a> mantido pelo <a href="https://caad.inf.ufpr.br/">CAAD (Centro Acadêmico Alexandre Direne)</a> que te mostra o cardápio do Restaurante Universitário da UFPR!
''', reply_markup=ReplyKeyboardMarkup(
[
[
"/agendar · Agendar envio de cardápio"
],
[
"/cardapio_central · Ver o cardápio do RU Central",
"/cardapio_poli · Ver o cardápio do RU Centro Politécnico",
],
[
"/cardapio_botanico · Ver o cardápio do RU Jardim Botânico",
"/cardapio_agrarias · Ver o cardápio do RU Agrárias",
],
]
))
COMMAND_TO_LOCATION = {
'cardapio_central': Location.CENTRAL,
'cardapio_poli': Location.POLITECNICO,
'cardapio_botanico': Location.BOTANICO,
'cardapio_agrarias': Location.AGRARIAS,
}
LOCATION_TO_HEADER = {
Location.CENTRAL: 'RU Central',
Location.POLITECNICO: 'RU Centro Politécnico',
Location.BOTANICO: 'RU Jardim Botânico',
Location.AGRARIAS: 'RU Agrárias',
}
def cardapio(update: Update, context: CallbackContext) -> None:
command = next(iter(update.message.parse_entities(types=["bot_command"]).values()))[1:]
location = COMMAND_TO_LOCATION[command]
days, update_time = get_meals_by_days(location)
header = f'<b>{LOCATION_TO_HEADER[location]}</b>'
if len(days):
body = '\n\n'.join(
f'<b>{day.date_raw}</b>\n' + '\n'.join(
f' <b>{meal.name}</b>\n' + '\n'.join(f' {item}' for item in meal.items) for meal in day.meals
) for day in days
)
else:
body = '<b>Cardápio indisponível</b>'
updated_on = f'<i>Atualizado às ' + update_time.strftime('%H:%M:%S') + ' de hoje</i>'
update.message.reply_html(header + '\n\n' + body + '\n\n' + updated_on)
logger.info(f"User {update.effective_user.id} {update.effective_user.first_name} {update.effective_user.last_name} {update.effective_user.username} used /{command}")
def main() -> None:
logger.info("Starting bot...")
updater = Updater(os.environ['TELEGRAM_BOT_TOKEN'])
for command in COMMAND_TO_LOCATION.keys():
updater.dispatcher.add_handler(CommandHandler(command, cardapio))
updater.dispatcher.add_handler(CommandHandler('start', start))
updater.start_polling()
logger.info("Connected")
updater.idle()
logger.info("Done")
if __name__ == '__main__':
main()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment