Up until this point, we've worked within the Jupyter Notebook, but now, in order to deploy our app, we'll move on to working in a text editor. The notebook is excellent for exploratory analysis and visualization, but running a background job is best done within a simple .py file. So, let's get started.
We'll begin with our imports. You may need to pip install a few of these if you don't already have them installed:
import sys import sys import numpy as np from bs4 import BeautifulSoup from selenium import webdriver import requests import scipy from PyAstronomy import pyasl from datetime import date, timedelta, datetime import time from time import sleep import schedule
Next, we'll create a function that pulls down the data and runs our algorithm:
def check_flights(): # replace this with the path of where you downloaded chromedriver chromedriver_path = "/Users/alexcombs/Downloads/chromedriver" browser = webdriver.Chrome(chromedriver_path) start_sat = sys.argv[2] end_sat = sys.argv[3] start_sat_date = datetime.strptime(start_sat, '%Y-%m-%d') end_sat_date = datetime.strptime(end_sat, '%Y-%m-%d') fare_dict = {} for i in range(26): sat_start = str(start_sat_date).split()[0] sat_end = str(end_sat_date).split()[0] fare_dict.update({sat_start: {}}) sats = "https://www.google.com/flights/?f=0#f=0&flt=/m/02_286.r/m/02j9z." + sat_start + "*r/m/02j9z./m/02_286." + sat_end + ";c:USD;e:1;s:0*1;sd:1;t:e" sleep(np.random.randint(10,15)) browser.get(sats) soup = BeautifulSoup(browser.page_source, "html5lib") cards = soup.select('div[class*=info-container]') for card in cards: city = card.select('h3')[0].text fare = card.select('span[class*=price]')[0].text fare_dict[sat_start] = {**fare_dict[sat_start], **{city: fare}} start_sat_date = start_sat_date + timedelta(days=7) end_sat_date = end_sat_date + timedelta(days=7) city_key = sys.argv[1] city_dict = {} for k,v in fare_dict.items(): city_dict.update({k:int(v[city_key].replace(',','').split('$')[1])}) prices = [int(x) for x in city_dict.values()] dates = city_dict.keys() r = pyasl.generalizedESD(prices, 3, 0.025, fullOutput=True) print('Total Outliers:', r[0]) out_dates = {} for i in sorted(r[1]): out_dates.update({list(dates)[i]: list(prices)[i]}) city_mean = np.mean(list(city_dict.values())) for k,v in out_dates.items(): if v < city_mean: requests.post('https://maker.ifttt.com/trigger/cheap_fares/with/key/bNHFwiZx0wMS7EnD425n3T', data={ "value1" : str(city_key), "value2" : str(v), "value3" : "" }) print('Alert for', city_key + '!') print('Fare: $' + str(v), 'on', k) print(' ') else: print(str(v) + ' is greater than ' + str(city_mean))
Finally, we'll include a scheduler. This will run our code every 60 minutes:
# set up the scheduler to run our code every 60 min schedule.every(60).minutes.do(check_flights) while 1: schedule.run_pending() time.sleep(1)
And that should do it. We can now save this as fare_alerter.py, and run it from the command line. You will need to pass in three arguments. The first is the city, the second is the start date, and the final one is the end date. An example is as follows:
python fare_alerter.py 'Milan' '2018-12-01' '2018-12-08'
The script will continue running and check fares every 60 minutes. If a mistake fare occurs, we'll be one of the first to know!