From 6be34d53055a6a65585fabb0c8e5dc23951d97d7 Mon Sep 17 00:00:00 2001 From: Giulio De Pasquale Date: Mon, 21 Jun 2021 02:35:27 +0100 Subject: [PATCH] check for links in embeds, updated partalert link checker --- app.py | 62 ++++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 43 insertions(+), 19 deletions(-) diff --git a/app.py b/app.py index 1e74520..0ad51cd 100644 --- a/app.py +++ b/app.py @@ -10,7 +10,8 @@ from os import getenv PRICE_REGEX = re.compile( "(?:(?P[GBP|EUR|£|€])(?P[0-9]+(?:\.[0-9]{1,2})))") MODEL_REGEX = re.compile("[Rr][Tt][Xx] ?(?P30[6789]0( [Tt][Ii])?).?") -URL_REGEX = re.compile("(?:(?:https?|ftp):\/\/|\b(?:[a-z\d]+\.))(?:(?:[^\s()<>]+|\((?:[^\s()<>]+|(?:\([^\s()<>]+\)))?\))+(?:\((?:[^\s()<>]+|(?:\(?:[^\s()<>]+\)))?\)|[^\s`!()\[\]{};:'.,<>?«»“”‘’]))?") +URL_REGEX = re.compile( + "(?:(?:https?|ftp):\/\/|\b(?:[a-z\d]+\.))(?:(?:[^\s()<>]+|\((?:[^\s()<>]+|(?:\([^\s()<>]+\)))?\))+(?:\((?:[^\s()<>]+|(?:\(?:[^\s()<>]+\)))?\)|[^\s`!()\[\]{};:'.,<>?«»“”‘’]))?") PARTALERT_ASIN = re.compile("asin=(?P[0-9a-zA-Z]{1,10})") PARTALERT_TLD = re.compile("tld=(?P\.(?:it|es|de|fr|co\.uk))") @@ -39,28 +40,47 @@ bot = discum.Client(token=token, log={"console": False, "file": False}) ######################################## -def get_stockinformer_url(url: str) -> str: +def get_soup(url: str): r = requests.get(url) - b = BeautifulSoup(r.text) + return BeautifulSoup(r.text) - for a in b.find_all("a"): + +def get_stockinformer_url(url: str) -> str: + bs = get_soup(url) + + for a in bs.find_all("a"): if "view at" in a.text.lower(): return f"https://stockinformer.co.uk/{a.get('href')}" return None +def get_partalert_url(url: str) -> str: + ret_url = None + bs = get_soup(url) + + for a in bs.find_all("a"): + if "amazon" in a.text.lower(): + amazon_url = a.get("href") + + try: + asin = PARTALERT_ASIN.search(amazon_url).group('asin') + tld = PARTALERT_TLD.search(amazon_url).group('tld') + + ret_url = f"https://amazon{tld}/dp/{asin}" + except Exception as e: + print(f"Exception: {e}") + + return ret_url + + def check_urls(urls: List[str]): for url in urls: print(f"Received {url}") if "partalert" in url: - try: - asin = PARTALERT_ASIN.search(url).group('asin') - tld = PARTALERT_TLD.search(url).group('tld') + url = get_partalert_url(url) - url = f"https://amazon{tld}/dp/{asin}" - except Exception as e: - print(f"Exception: {e}") + if not url: continue elif "stockinformer" in url: url = get_stockinformer_url(url) @@ -108,25 +128,29 @@ def check_price(message: str) -> bool: @bot.gateway.command def on_message(resp): + urls = [] + if resp.event.ready_supplemental: bot.gateway.subscribeToGuildEvents(wait=1) if resp.event.message: m = resp.parsed.auto() - # because DMs are technically channels too - guildID = m['guild_id'] if 'guild_id' in m else None - channelID = int(m['channel_id']) - username = m['author']['username'] - discriminator = m['author']['discriminator'] - content = m['content'] - if channelID in MONITORED_CHANNELS: - urls = URL_REGEX.findall(content) + channel_id = int(m['channel_id']) + content = m['content'] + embeds = m['embeds'] + + if channel_id in MONITORED_CHANNELS: + # search for urls in message text + urls.append(URL_REGEX.findall(content)) + + # search for urls in embeds + for e in embeds: + urls.append(URL_REGEX.findall(e['value'])) if (urls): check_urls(urls) print("Initialized.") - bot.gateway.run(auto_reconnect=True)