From f311afa04b3825f29b6f28547101c29fdb4863db Mon Sep 17 00:00:00 2001 From: Spencer Lyon Date: Wed, 8 Aug 2012 21:00:53 -0700 Subject: [PATCH 1/5] Added options data functionality --- pandas/io/data.py | 337 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 327 insertions(+), 10 deletions(-) diff --git a/pandas/io/data.py b/pandas/io/data.py index 65c4daec3d..4255191729 100644 --- a/pandas/io/data.py +++ b/pandas/io/data.py @@ -11,9 +11,12 @@ import time from zipfile import ZipFile +from BeautifulSoup import BeautifulSoup from pandas.util.py3compat import StringIO, BytesIO, bytes_to_str -from pandas import DataFrame, read_csv +from pandas import DataFrame, read_csv, concat +from pandas.io.parsers import TextParser + def DataReader(name, data_source=None, start=None, end=None, retry_count=3, pause=0): @@ -59,6 +62,7 @@ def DataReader(name, data_source=None, start=None, end=None, elif(data_source == "famafrench"): return get_data_famafrench(name=name) + def _sanitize_dates(start, end): from pandas.core.datetools import to_datetime start = to_datetime(start) @@ -69,37 +73,38 @@ def _sanitize_dates(start, end): end = dt.datetime.today() return start, end + def get_quote_yahoo(symbols): """ Get current yahoo quote Returns a DataFrame """ - if not isinstance(symbols,list): + if not isinstance(symbols, list): raise TypeError, "symbols must be a list" # for codes see: http://www.gummy-stuff.org/Yahoo-data.htm codes = {'symbol':'s','last':'l1','change_pct':'p2','PE':'r','time':'t1','short_ratio':'s7'} request = str.join('',codes.values()) # code request string header = codes.keys() - data = dict(zip(codes.keys(),[[] for i in range(len(codes))])) + data = dict(zip(codes.keys(), [[] for i in range(len(codes))])) urlStr = 'http://finance.yahoo.com/d/quotes.csv?s=%s&f=%s' % (str.join('+',symbols), request) try: lines = urllib2.urlopen(urlStr).readlines() except Exception, e: - s = "Failed to download:\n{0}".format(e); + s = "Failed to download:\n{0}".format(e) print s return None for line in lines: fields = line.strip().split(',') - for i,field in enumerate(fields): + for i, field in enumerate(fields): if field[-2:] == '%"': data[header[i]].append(float(field.strip('"%'))) elif field[0] == '"': - data[header[i]].append( field.strip('"')) + data[header[i]].append(field.strip('"')) else: try: data[header[i]].append(float(field)) @@ -108,7 +113,8 @@ def get_quote_yahoo(symbols): idx = data.pop('symbol') - return DataFrame(data,index=idx) + return DataFrame(data, index=idx) + def get_data_yahoo(name=None, start=None, end=None, retry_count=3, pause=0): """ @@ -136,7 +142,7 @@ def get_data_yahoo(name=None, start=None, end=None, retry_count=3, pause=0): '&ignore=.csv' for _ in range(retry_count): - resp = urllib2.urlopen(url) + resp = urllib2.urlopen(url) if resp.code == 200: lines = resp.read() rs = read_csv(StringIO(bytes_to_str(lines)), index_col=0, @@ -155,7 +161,6 @@ def get_data_yahoo(name=None, start=None, end=None, retry_count=3, pause=0): "return a 200 for url %s" % (pause, url)) - def get_data_fred(name=None, start=dt.datetime(2010, 1, 1), end=dt.datetime.today()): """ @@ -177,6 +182,7 @@ def get_data_fred(name=None, start=dt.datetime(2010, 1, 1), data = read_csv(urllib.urlopen(url), index_col=0, parse_dates=True) return data.truncate(start, end) + def get_data_famafrench(name, start=None, end=None): start, end = _sanitize_dates(start, end) @@ -190,7 +196,7 @@ def get_data_famafrench(name, start=None, end=None): file_edges = np.where(np.array([len(d) for d in data]) == 2)[0] datasets = {} - for i in range(len(file_edges)-1): + for i in range(len(file_edges) - 1): dataset = [d.split() for d in data[(file_edges[i] + 1):file_edges[i+1]]] if(len(dataset) > 10): ncol = np.median(np.array([len(d) for d in dataset])) @@ -203,3 +209,314 @@ def get_data_famafrench(name, start=None, end=None): datasets[i] = DataFrame(dataset, index, columns=header) return datasets + +cur_month = dt.datetime.now().month +cur_year = dt.datetime.now().year + + +def _unpack(row, kind='td'): + return [val.text for val in row.findAll(kind)] + + +def _parse_options_data(table): + rows = table.findAll('tr') + header = _unpack(rows[0], kind='th') + data = [_unpack(r) for r in rows[1:]] + return TextParser(data, names=header).get_chunk() + + +class Options(): + """ + This class fetches call/put data for a given stock/exipry month. + + It is instantiated with a string representing the ticker symbol. + + The class has the following methods: + get_options_data:(month, year) + get_call_data:(month, year) + get_put_data: (month, year) + get_near_stock_price(opt_frame, above_below) + get_forward_data(months, call, put) + + Examples + -------- + # Instantiate object with ticker + >>> aapl = Options('aapl') + + # Fetch September 2012 call data + >>> calls = aapl.get_call_data(9, 2012) + + # Fetch September 2012 put data + >>> puts = aapl.get_put_data(9, 2012) + + # cut down the call data to be 3 below and 3 above the stock price. + >>> cut_calls = aapl.get_near_stock_price(calls, above_below=3) + + # Fetch call and put data with expiry from now to 8 months out + >>> forward_calls, forward_puts = aapl.get_forward_data(8, + ... call=True, put=True) + """ + + def __init__(self, symbol): + """ Instantiates options_data with a ticker saved as symbol """ + self.symbol = str(symbol).upper() + + def get_options_data(self, month=cur_month, year=cur_year): + """ + Gets call/put data for the stock with the expiration data in the + given month and year + + Parameters + ---------- + month: number, int + The month of the options expire. + + year: number, int + The year the options expire. + + excel: bool, optional(default=False) + A boolean value indicating whether or not the data should be saved + to an excel spreadsheet. If true the name of the file will be + "'ticker'_options.xlsx" unless otherwise indicated. Also there will + be two sheets created. The first one is named 'calls' and contains + the call data and the second is for the puts. + + Returns + ------- + call_data: pandas.DataFrame + A DataFrame with call options data. + + put_data: pandas.DataFrame + A DataFrame with call options data. + """ + + mon_in = month if len(str(month)) == 2 else str('0' + str(month)) + + url = str('http://finance.yahoo.com/q/op?s=' + self.symbol + '&m=' + + str(year) + '-' + str(mon_in)) + + buf = urllib2.urlopen(url) + soup = BeautifulSoup(buf) + body = soup.body + + tables = body.findAll('table') + calls = tables[9] + puts = tables[13] + + call_data = _parse_options_data(calls) + put_data = _parse_options_data(puts) + + return [call_data, put_data] + + def get_call_data(self, month=cur_month, year=cur_year): + """ + Gets call/put data for the stock with the expiration data in the + given month and year + + Parameters + ---------- + month: number, int + The month of the options expire. + + year: number, int + The year the options expire. + + excel: bool, optional(default=False) + A boolean value indicating whether or not the data should be saved + to an excel spreadsheet. If true the name of the file will be + "'ticker'_options.xlsx" unless otherwise indicated. Also there will + be two sheets created. The first one is named 'calls' and contains + the call data and the second is for the puts. + + Returns + ------- + call_data: pandas.DataFrame + A DataFrame with call options data. + + put_data: pandas.DataFrame + A DataFrame with call options data. + """ + + mon_in = month if len(str(month)) == 2 else str('0' + str(month)) + + url = str('http://finance.yahoo.com/q/op?s=' + self.symbol + '&m=' + + str(year) + '-' + str(mon_in)) + + buf = urllib2.urlopen(url) + soup = BeautifulSoup(buf) + body = soup.body + + tables = body.findAll('table') + calls = tables[9] + + call_data = _parse_options_data(calls) + + return call_data + + def get_put_data(self, month=cur_month, year=cur_year): + """ + Gets put data for the stock with the expiration data in the + given month and year + + Parameters + ---------- + month: number, int + The month of the options expire. + + year: number, int + The year the options expire. + + excel: bool, optional(default=False) + A boolean value indicating whether or not the data should be saved + to an excel spreadsheet. If true the name of the file will be + "'ticker'_options.xlsx" unless otherwise indicated. Also there will + be two sheets created. The first one is named 'calls' and contains + the call data and the second is for the puts. + + Returns + ------- + put_data: pandas.DataFrame + A DataFrame with call options data. + """ + + mon_in = month if len(str(month)) == 2 else str('0' + str(month)) + + url = str('http://finance.yahoo.com/q/op?s=' + self.symbol + '&m=' + + str(year) + '-' + str(mon_in)) + + buf = urllib2.urlopen(url) + soup = BeautifulSoup(buf) + body = soup.body + + tables = body.findAll('table') + puts = tables[13] + + put_data = _parse_options_data(puts) + + return put_data + + def get_near_stock_price(self, opt_df, above_below=2): + """ + Cuts the data frame opt_df that is passed in to only take + options that are near the current stock price. + + Parameters + ---------- + opt_df: DataFrame + The DataFrame that will be passed in to be cut down. + + above_below: number, int, optional (default=2) + The number of strike prices above and below the stock price that + should be taken + + Returns + ------- + chopped: DataFrame + The resultant DataFrame chopped down to be 2 * above_below + 1 rows + desired. If there isn't data as far out as the user has asked for + then + """ + price = get_quote_yahoo(['aapl'])['last'] + start_index = np.where(opt_df['Strike'] > price)[0][0] + + get_range = range(start_index - above_below, + start_index + above_below + 1) + + chopped = opt_df.ix[get_range, :] + + chopped = chopped.dropna() + chopped = chopped.reset_index() + + return chopped + + def get_forward_data(self, months, call=True, put=False): + """ + Gets either call, put, or both data for months starting in the current + month and going out in the future a spcified amount of time. + + Parameters + ---------- + months: number, int + How many months to go out in the collection of the data. This is + inclusive. + + call: bool, optional (default=True) + Whether or not to collect data for call options + + put: bool, optional (default=False) + Whether or not to collect data for put options. + + Returns + ------- + all_calls: DataFrame + If asked for, a DataFrame containing call data from the current + month to the current month plus months. + + all_puts: DataFrame + If asked for, a DataFrame containing put data from the current + month to the current month plus months. + """ + in_months = range(cur_month, cur_month + months + 1) + in_years = [cur_year] * months + + # Figure out how many items in in_months go past 12 + to_change = 0 + for i in range(months): + if in_months[i] > 12: + in_months[i] -= 12 + to_change += 1 + + # Change the corresponding items in the in_years list. + for i in range(1, to_change + 1): + in_years[-i] += 1 + + if call: + all_calls = DataFrame() + for mon in range(months): + try: # This catches cases when there isn't data for a month + call_frame = self.get_call_data(in_months[mon], + in_years[mon]) + tick = str(call_frame.ix[0, 1]) + start = len(self.symbol) + year = tick[start: start + 2] + month = tick[start + 2: start + 4] + day = tick[start + 4: start + 6] + expiry = str(month + '-' + day + '-' + year) + call_frame['Expiry'] = expiry + if mon == 0: + all_calls = all_calls.join(call_frame, how='right') + else: + all_calls = concat([all_calls, call_frame]) + except: + pass + + if put: + all_puts = DataFrame() + for mon in range(months): + try: # This catches cases when there isn't data for a month + put_frame = self.get_put_data(in_months[mon], + in_years[mon]) + + # Add column with expiry data to this frame. + tick = str(put_frame.ix[0, 1]) + start = len(self.symbol) + year = tick[start: start + 2] + month = tick[start + 2: start + 4] + day = tick[start + 4: start + 6] + expiry = str(month + '-' + day + '-' + year) + put_frame['Expiry'] = expiry + + if mon == 0: + all_puts = all_puts.join(put_frame, how='right') + else: + all_puts = concat([all_puts, put_frame]) + except: + pass + + if call and put: + return [all_calls, all_puts] + else: + if call: + return all_calls + else: + return all_puts \ No newline at end of file From 8731062a24a23bf6ef2cfd258ce7eab86676c4da Mon Sep 17 00:00:00 2001 From: Spencer Lyon Date: Wed, 8 Aug 2012 21:09:32 -0700 Subject: [PATCH 2/5] Cleaned up docstrings I left some things in the docstrings for get_options_data, get_calls_data, and get_puts_data. --- pandas/io/data.py | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/pandas/io/data.py b/pandas/io/data.py index 4255191729..a60e2a51de 100644 --- a/pandas/io/data.py +++ b/pandas/io/data.py @@ -274,12 +274,6 @@ def get_options_data(self, month=cur_month, year=cur_year): year: number, int The year the options expire. - excel: bool, optional(default=False) - A boolean value indicating whether or not the data should be saved - to an excel spreadsheet. If true the name of the file will be - "'ticker'_options.xlsx" unless otherwise indicated. Also there will - be two sheets created. The first one is named 'calls' and contains - the call data and the second is for the puts. Returns ------- @@ -321,20 +315,10 @@ def get_call_data(self, month=cur_month, year=cur_year): year: number, int The year the options expire. - excel: bool, optional(default=False) - A boolean value indicating whether or not the data should be saved - to an excel spreadsheet. If true the name of the file will be - "'ticker'_options.xlsx" unless otherwise indicated. Also there will - be two sheets created. The first one is named 'calls' and contains - the call data and the second is for the puts. - Returns ------- call_data: pandas.DataFrame A DataFrame with call options data. - - put_data: pandas.DataFrame - A DataFrame with call options data. """ mon_in = month if len(str(month)) == 2 else str('0' + str(month)) @@ -366,13 +350,6 @@ def get_put_data(self, month=cur_month, year=cur_year): year: number, int The year the options expire. - excel: bool, optional(default=False) - A boolean value indicating whether or not the data should be saved - to an excel spreadsheet. If true the name of the file will be - "'ticker'_options.xlsx" unless otherwise indicated. Also there will - be two sheets created. The first one is named 'calls' and contains - the call data and the second is for the puts. - Returns ------- put_data: pandas.DataFrame From b514d8309715808e02cbf4d69aa8a36aa4e4f15e Mon Sep 17 00:00:00 2001 From: Spencer Lyon Date: Wed, 8 Aug 2012 21:11:24 -0700 Subject: [PATCH 3/5] Final cleansing of docstrings --- pandas/io/data.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/io/data.py b/pandas/io/data.py index a60e2a51de..726fccf10c 100644 --- a/pandas/io/data.py +++ b/pandas/io/data.py @@ -269,7 +269,7 @@ def get_options_data(self, month=cur_month, year=cur_year): Parameters ---------- month: number, int - The month of the options expire. + The month the options expire. year: number, int The year the options expire. @@ -310,7 +310,7 @@ def get_call_data(self, month=cur_month, year=cur_year): Parameters ---------- month: number, int - The month of the options expire. + The month the options expire. year: number, int The year the options expire. @@ -345,7 +345,7 @@ def get_put_data(self, month=cur_month, year=cur_year): Parameters ---------- month: number, int - The month of the options expire. + The month the options expire. year: number, int The year the options expire. From 059047692bc53517d1826b206ad8c9ceab28001f Mon Sep 17 00:00:00 2001 From: Spencer Lyon Date: Thu, 9 Aug 2012 09:30:35 -0700 Subject: [PATCH 4/5] Found error in get_near_stock_price(). It was getting the stock price only for aapl instead of self.symbol --- pandas/io/data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/data.py b/pandas/io/data.py index 726fccf10c..05461846eb 100644 --- a/pandas/io/data.py +++ b/pandas/io/data.py @@ -393,7 +393,7 @@ def get_near_stock_price(self, opt_df, above_below=2): desired. If there isn't data as far out as the user has asked for then """ - price = get_quote_yahoo(['aapl'])['last'] + price = get_quote_yahoo([self.symbol])['last'] start_index = np.where(opt_df['Strike'] > price)[0][0] get_range = range(start_index - above_below, From 5683611c3a79e211a0441402c7abee595f9ccff9 Mon Sep 17 00:00:00 2001 From: Spencer Lyon Date: Fri, 17 Aug 2012 11:49:15 -0600 Subject: [PATCH 5/5] Made BeautifulSoup import local and made Options class inherit from object. Changes made based on discussion in pull request. Signed-off-by: Spencer Lyon --- pandas/io/data.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/io/data.py b/pandas/io/data.py index 05461846eb..ab4a95d37a 100644 --- a/pandas/io/data.py +++ b/pandas/io/data.py @@ -11,7 +11,6 @@ import time from zipfile import ZipFile -from BeautifulSoup import BeautifulSoup from pandas.util.py3compat import StringIO, BytesIO, bytes_to_str from pandas import DataFrame, read_csv, concat @@ -225,7 +224,7 @@ def _parse_options_data(table): return TextParser(data, names=header).get_chunk() -class Options(): +class Options(object): """ This class fetches call/put data for a given stock/exipry month. @@ -283,6 +282,7 @@ def get_options_data(self, month=cur_month, year=cur_year): put_data: pandas.DataFrame A DataFrame with call options data. """ + from BeautifulSoup import BeautifulSoup mon_in = month if len(str(month)) == 2 else str('0' + str(month)) @@ -320,6 +320,7 @@ def get_call_data(self, month=cur_month, year=cur_year): call_data: pandas.DataFrame A DataFrame with call options data. """ + from BeautifulSoup import BeautifulSoup mon_in = month if len(str(month)) == 2 else str('0' + str(month)) @@ -355,6 +356,7 @@ def get_put_data(self, month=cur_month, year=cur_year): put_data: pandas.DataFrame A DataFrame with call options data. """ + from BeautifulSoup import BeautifulSoup mon_in = month if len(str(month)) == 2 else str('0' + str(month)) @@ -496,4 +498,4 @@ def get_forward_data(self, months, call=True, put=False): if call: return all_calls else: - return all_puts \ No newline at end of file + return all_puts