diff --git a/README.md b/README.md new file mode 100644 index 0000000..8eecada --- /dev/null +++ b/README.md @@ -0,0 +1,20 @@ +#EA-Tools 2014 +This repository contains a flask app which managed processes for VIP QA and DECC through the 2014 Election. VIP-specific scripts are stored in the vip folder, and decc-specific scripts are stored in the decc folder. Each folder, except app, contains its own README detailing the included scripts. + + +##For this app to run, the following environment variables must be defined: +###For DECC: ++ *DECCINPUT:* the directory to use to input to decc scripts ++ *DECCOUTPUT:* the directory to output decc files after processing ++ *PGHOST:* the URL or IP of the DECC database server ++ *PGUSER:* the username to connect to the DECC database ++ *PGPASSWORD:* the password associated with PGUSER ++ *PGDB:* the DECC database name + +###For VIP QA: ++ *GOOGLE_NATIVE_APP_CLIENT_ID:* The client ID associated with the VIP QA app ++ *GOOGLE_NATIVE_APP_CLIENT_SECRET:* The client secret associated with the VIP QA app ++ *GOOGLE_PUBLIC_API_KEY:* The API key used to query the Google civicInfo API ++ *GOOGLE_GEOCODE_API_KEY:* The API key used to query the Google geocode API ++ *VIPQADATA:* The directory containing TargetSmart PII spreadsheets used to QA election-day voting sites ++ *EVIPQADATA:* The directory containing TargetSmart PII spreadsheets used to QA early voting sites diff --git a/config.py b/config.py index 094127c..07b553f 100644 --- a/config.py +++ b/config.py @@ -1,12 +1,13 @@ -from oauth2client.client import OAuth2WebServerFlow import os -api_id = os.getenv('GOOGLE_NATIVE_APP_CLIENT_ID') -api_secret = os.getenv('GOOGLE_NATIVE_APP_CLIENT_SECRET') -api_key = os.getenv('GOOGLE_PUBLIC_API_KEY') -geokey = os.getenv('GOOGLE_GEOCODE_API_KEY') -vip_qa_data = os.getenv('VIPQADATA') -ev_qa_data = os.getenv('EVIPQADATA') +#This import is needed for VIP +from oauth2client.client import OAuth2WebServerFlow + +#These are generally useful +CSRF_ENABLED = True +SECRET_KEY = os.getenv('SECRET_KEY') + +#These variables configure the DECC scripts deccinputdir = os.getenv('DECCINPUT') deccoutputdir = os.getenv('DECCOUTPUT') HOST = os.getenv('PGHOST') @@ -14,15 +15,20 @@ DB = os.getenv('PGDB') PASSWORD = os.getenv('PGPASSWORD') -CSRF_ENABLED = True -SECRET_KEY = os.getenv('SECRET_KEY') - +#These are all VIP Variables +api_id = os.getenv('GOOGLE_NATIVE_APP_CLIENT_ID') +api_secret = os.getenv('GOOGLE_NATIVE_APP_CLIENT_SECRET') +api_key = os.getenv('GOOGLE_PUBLIC_API_KEY') +geokey = os.getenv('GOOGLE_GEOCODE_API_KEY') +vip_qa_data = os.getenv('VIPQADATA') +ev_qa_data = os.getenv('EVIPQADATA') states = {'AL': 'Alabama', 'AR': 'Arkansas', 'AZ': 'Arizona', 'ME': 'Maine', 'NH': 'New Hampshire', 'TN': 'Tennessee', 'LA': 'Louisiana', 'IL': 'Illinois', 'IN': 'Indiana', 'ID': 'Idaho', 'GA': 'Georgia', 'MA': 'Massachusetts', 'SD': 'South Dakota', 'VT': 'Vermont', - 'FL': 'Florida', 'MS': 'Mississippi'} - + 'FL': 'Florida', 'MS': 'Mississippi', 'KY': 'Kentucky', + 'TX': 'Texas', 'SC': 'South Carolina', 'WV': 'West Virginia', + 'NM': 'New Mexico'} scope1 = 'https://spreadsheets.google.com/feeds' scope2 = 'https://www.googleapis.com/auth/drive' scope = '{0} {1}'.format(scope1, scope2) @@ -31,6 +37,5 @@ client_secret=api_secret, scope=scope, redirect_uri=redirect) - vipTemplateKey = '1qcqHBizQeFJwXsORMS_QS59gywuT9TRifwQe4BM_G3E' evTemplateKey = '1_uEKMFrFxfu69Ws-2QbmUPm1kFNMY5txGJzG8bfzK4s' diff --git a/decc/README.md b/decc/README.md new file mode 100644 index 0000000..5917a1d --- /dev/null +++ b/decc/README.md @@ -0,0 +1,34 @@ +#DECC Processing Scripts + +These scripts handle decc processing as managed by the Flask app. Each script is detailed below + ++ *processScans.py* processes newly received digital or physical orders. + + *findClients()* accepts a psycopg2 cursor object and queries the DECC database to list clients. + + *getProject()* accepts a client ID and psycopg2 cursor object. It queries the DECC database and returns a list of projects associated with the provided client ID. + + *findOrders()* accepts a project ID and psycopg2 cursor object. It queries the DECC database and returns a list of orders associated with the provided project ID. + + *createOrder()* accepts a project ID and psycopg2 cursor object. It inserts a new order record into the DECC database. + + *findTypes()* accepts a project ID and psycopg2 cursor object. It queries the DECC database and returns a list of form types associated with the provided project ID. + + *createPart()* accepts an order ID, type ID, state, booleans indicating whether the order is rush, will be uploaded to van, matched to vendor, or sent to quad, a psycopg2 cursor object, and a psycopg2 db connection object. It inserts a new part record into the DECC database and returns the part ID. + + *obtainStartNum()* accepts a client ID and psycopg2 cursor object. It queries the DECC database and returns the next batch number associated with the given client. + + *processPDF()* is run for digitally transmitted orders. It accepts an input directory, output directory, starting batch number, part ID, psycopg2 cursor object, and psycopg2 database connection object. It iterates over every file listed recursively in the input directory and inserts a new batch record (including total pages) in the DECC database for each. It returns the ending batch number and the total number of pages processed. + + *processPhysical()* is run for physically shipped orders. It accepts an input file, output file, part ID, starting batch number, psycopg2 database connection object, psycopg2 cursor object, and order ID. It reads the input file and creates a new batch record for each row using the 'Batch Name' column in the input file. It the writes out all batches created with name and ID. + + *getCursor()* accepts a host, database, username, and password, and returns a psycopg2 cursor object and a psycopg2 database connection object. + + ++ *processXLSX.py* process returned data from the Data-entry vendor. + + *getBatches()* accepts a psycopg2 cursor object and returns a dictionary listing all DECC batch information from the DECC database. + + *writeFile()* accepts a list of row dictionaries to be written, an output filename, and a list of headers. It writes out the list of dictionaries with the given headers to the output filename. + + *processXLSX()* accepts an input filename referencing an Excel file, a psycopg2 database connection object, and a psycopg2 cursor object. It reads in the excel file, and iterates over each row matching to its original batch name. It then updates batch entries with the final number of records. + + *main()* accepts a boolean indicating whether a file contains VR records, an input filename, and an output filename. It connects to the DECC database, reads in the input file, runs processXLSX, and calls vrqc.py if the file is voter registration. It then outputs to the output file. + ++ *vrqc.py* runs quality checks on returned voter registration data. + + *readCSV()* accepts a filename and returns a list of dictionaries containing data for each row. + + *writeCSV()* accepts a list of dictionaries containing row data, an output filename, and a list of headers. It writes the list of dictionaries out to the output filename using the list of headers. + + *getFIPS()* accepts a url containing FIPS code translation data. It returns a dictionary mapping FIPS codes to county names, and a dictionary mapping county names to state abbreviations. + + *getZipURL()* accepts the URL of the page listing HUD zip-FIPS code mapping files, and obtains the URL of the most recent HUD file mapping Zip Codes to county FIPS codes. + + *getZips()* accepts a URL of a HUD file mapping zip codes to county FIPS codes. It returns a list of dictionaries with keys ZIP and FIPS. + + *buildZipTranslator()* accepts the FIPS dictionary created in getFIPS(), and the list created by getZips() and creates a single dictionary with zip codes as keys, and as values, a list of dictionaries with STATE and COUNTY as keys + + *inspectRows()* accepts the list of row dictionaries, the zip translator, and the stateDict. It iterates over each row of voter registration data and checks whether the data included make any sense. It returns an updated list of row dictionaries, and an aggregate report. + + *report()* writes out as JSON the object passed to it as an argument. + + *concatenateFields()* concatenates the values for addresses and dates to create values that are more acceptable to VAN. + + *run()* accepts as argument the list of Dictionaries from processXLSX.py, and returns a final QC'd version of that list. diff --git a/decc/processScans.py b/decc/processScans.py index 3237437..28bfb21 100644 --- a/decc/processScans.py +++ b/decc/processScans.py @@ -19,8 +19,8 @@ def findClients(cursor): def getProject(clientID, cursor): cursor.execute('''SELECT project_id - FROM decc_form_client - WHERE id = {0} + FROM decc_form_client + WHERE id = {0} '''.format(clientID)) value = cursor.fetchall()[0][0] return value @@ -67,9 +67,9 @@ def createPart(orderID, typeID, state, rush, van, match, quad, cursor, db): match)) db.commit() cursor.execute('''SELECT MAX(id) - FROM decc_form_part - WHERE order_id = {0} - '''.format(orderID)) + FROM decc_form_part + WHERE order_id = {0} + '''.format(orderID)) result = cursor.fetchall()[0][0] return result diff --git a/vip/FL.py b/vip/FL.py index a9c0388..fb91853 100644 --- a/vip/FL.py +++ b/vip/FL.py @@ -14,7 +14,11 @@ def getValues(row): city = row['vf_reg_cass_city'] zipcode = row['vf_reg_cass_zip'] county = row['vf_county_name'] - return num, predir, name, suffix, postdir, city, zipcode, county + date = str(row['voterbase_dob']) + lastName = row['tsmart_last_name'] + if len(date) == 8: + dob = '{0}/{1}/{2}'.format(date[4:6], date[6:8], date[:4]) + return num, predir, name, suffix, postdir, city, zipcode, county, dob, lastName def getHiddenValues(form): @@ -24,15 +28,6 @@ def getHiddenValues(form): return fields -def getCounties(soup): - counties = {} - selectName = 'ctl00$ContentPlaceHolder1$usrCounty$cboCounty' - select = soup.find('select', {'name': selectName}) - for item in select.find_all('option'): - counties[item.text.strip().upper()] = item.get('value') - return counties - - def matchString(string, stringList): maximum = 0 string = str(string.strip().upper()) @@ -160,35 +155,59 @@ def getLee(num, predir, name, suffix, postdir, zipcode): return ppid, name, address +def electionsFL(lastName, dob, num, county): + url = 'https://www.electionsfl.org/VoterInfo/asmx/service1.asmx/' + county = county.replace(' ', '').lower() + header = {'Content-Type': 'application/json; charset=UTF-8'} + session = Session() + payload = {'LastName': lastName, 'BirthDate': dob, 'StNumber': num, + 'County': county, 'FirstName': '', 'challengeValue': '', + 'responseValue': ''} + response = session.post(url + 'FindVoter', data=json.dumps(payload), + headers=header) + print response.text + voterID = str(json.loads(json.loads(response.text)['d'])[0]['FVRSVoterIdNumber']) + payload = {'FVRSVoterIDNumber': voterID, 'CurCounty': county} + response = session.post(url + 'GetElectionInfo', data=json.dumps(payload), + headers=header) + print response.text + data = json.loads(json.loads(response.text)['d'])[0] + ppid = '' + name = data['place_name'] + address = data['office_location'] + return ppid, name, address + + def run(row): - num, predir, name, suffix, postdir, city, zipcode, county = getValues(row) - while True: - try: - if county.upper() == 'PALM BEACH': - url = 'https://www.pbcelections.org/' - eid = '139' - pollingInfo = precinctFinder(url, num, predir, name, suffix, - postdir, city, zipcode, eid) - elif county.upper() == 'SARASOTA': - url = 'https://www.sarasotavotes.com/' - eid = '82' - pollingInfo = precinctFinder(url, num, predir, name, suffix, - postdir, city, zipcode, eid) - elif county.upper() == 'VOLUSIA': - fullcounty = 'volusia' - pollingInfo = voterFocus(num, predir, name, suffix, postdir, - city, zipcode, fullcounty) - elif county.upper() == 'OSCEOLA': - fullcounty = 'osceola' - pollingInfo = voterFocus(num, predir, name, suffix, postdir, - city, zipcode, fullcounty) - elif county.upper() == 'LEE': - pollingInfo = getLee(num, predir, name, suffix, postdir, - zipcode) - else: - return '', '', '' - return pollingInfo - except Exception as inst: - print type(inst) - print inst + num, predir, name, suffix, postdir, city, zipcode, county, dob, lastName = getValues(row) + try: + if county.upper() == 'PALM BEACH': + url = 'https://www.pbcelections.org/' + eid = '139' + pollingInfo = precinctFinder(url, num, predir, name, suffix, + postdir, city, zipcode, eid) + elif county.upper() == 'SARASOTA': + url = 'https://www.sarasotavotes.com/' + eid = '82' + pollingInfo = precinctFinder(url, num, predir, name, suffix, + postdir, city, zipcode, eid) + elif county.upper() == 'VOLUSIA': + fullcounty = 'volusia' + pollingInfo = voterFocus(num, predir, name, suffix, postdir, + city, zipcode, fullcounty) + elif county.upper() == 'OSCEOLA': + fullcounty = 'osceola' + pollingInfo = voterFocus(num, predir, name, suffix, postdir, + city, zipcode, fullcounty) + elif county.upper() == 'LEE': + pollingInfo = getLee(num, predir, name, suffix, postdir, + zipcode) + elif county.upper() == 'ST LUCIE' or county.upper() == 'LAKE': + pollingInfo = electionsFL(lastName, dob, num, county) + else: return '', '', '' + return pollingInfo + except Exception as inst: + print type(inst) + print inst + return '', '', '' diff --git a/vip/KY.py b/vip/KY.py new file mode 100644 index 0000000..d010459 --- /dev/null +++ b/vip/KY.py @@ -0,0 +1,127 @@ +from bs4 import BeautifulSoup +from requests import Session +import Levenshtein +import json + + +def getValues(row): + num = row['vf_reg_cass_street_num'] + predir = row['vf_reg_cass_pre_directional'] + name = row['vf_reg_cass_street_name'] + suffix = row['vf_reg_cass_street_suffix'] + postdir = row['vf_reg_cass_post_directional'] + county = row['vf_county_name'] + return num, predir, name, suffix, postdir, county + + +def getHiddenValues(form): + fields = {} + for item in form.find_all('input', {'type': 'hidden'}): + fields[item.get('name')] = item.get('value') + return fields + + +def getCounties(soup): + counties = {} + selectName = 'ctl00$ContentPlaceHolder1$usrCounty$cboCounty' + select = soup.find('select', {'name': selectName}) + for item in select.find_all('option'): + counties[item.text.strip().upper()] = item.get('value') + return counties + + +def matchString(string, stringList): + maximum = 0 + string = str(string.strip().upper()) + optionList = [] + for text in stringList: + newstring = str(text.strip().upper()) + score = Levenshtein.ratio(string, newstring) + maximum = max(maximum, score) + optionList.append((score, text)) + for option in optionList: + if maximum == option[0]: + return str(option[1]) + + +def processBlanks(value, replacement): + if value == '': + value = replacement + return value + + +def getJefferson(num, predir, name, suffix, postdir): + url = 'http://www.jeffersoncountyclerk.org/WhereDoIVote/Default.aspx' + addrStr = '{0} {1} {2} {3} {4}'.format(num, predir, name, suffix, postdir) + addrStr = addrStr.strip().replace(' ', ' ').replace(' ', ' ') + session = Session() + data = {'count': 20, 'prefixText': addrStr} + header = {'Content-Type': 'application/json; charset=UTF-8'} + response = session.post(url + '/GetAddress', data=json.dumps(data), + headers=header) + data = json.loads(response.text) + addresses = data['d'] + address = matchString(addrStr, addresses) + html = session.get(url).text + fields = getHiddenValues(BeautifulSoup(html).find('form')) + fields['txtStreet'] = address + fields['cmdDisplay'] = 'Search' + response = session.post(url, data=fields) + soup = BeautifulSoup(response.text) + name = '' + address = '' + ppid = '' + nameLabel = soup.find('span', {'id': 'lblLocation'}) + addressLabel = soup.find('span', {'id': 'lblAddress'}) + if nameLabel is not None: + name = nameLabel.string.strip() + if addressLabel is not None: + address = addressLabel.string.strip() + address += ' LOUISVILLE, KY' + return ppid, name, address + + +def getFayette(num, predir, name, suffix): + url = 'https://www.fayettecountyclerk.com/web/elections/votingLocationsResults.htm' + session = Session() + fields = {'streetInNumber': num, 'streetInDir': predir, + 'streetInName': name, 'streetInType': suffix} + response = session.post(url, data=fields) + soup = BeautifulSoup(response.text) + table = soup.find('table', {'cellpadding': '2'}).find_all('tr') + precinctDict = {} + for row in table: + cells = row.find_all('td') + label = cells[0].get_text().strip() + value = cells[1].get_text().strip() + precinctDict[label] = value + ppid = '' + name = '' + address = '' + if 'Precinct Code:' in precinctDict: + ppid = precinctDict['Precinct Code:'].strip() + if 'Voting Location:' in precinctDict: + name = precinctDict['Voting Location:'] + name = name.replace('- View on Map', '').strip() + if 'Precinct Address:' in precinctDict: + address = precinctDict['Precinct Address:'].strip() + if 'Precinct Zip Code:' in precinctDict: + address += ' LEXINGTON, KY ' + address += precinctDict['Precinct Zip Code:'].strip() + return ppid, name, address + + +def run(row): + num, predir, name, suffix, postdir, county = getValues(row) + try: + if county.upper() == 'JEFFERSON': + pollingInfo = getJefferson(num, predir, name, suffix, postdir) + elif county.upper() == 'FAYETTE': + pollingInfo = getFayette(num, predir, name, suffix) + else: + return '', '', '' + return pollingInfo + except Exception as inst: + print type(inst) + print inst + return '', '', '' diff --git a/vip/MS.py b/vip/MS.py index fcd3fab..b4e0cb5 100644 --- a/vip/MS.py +++ b/vip/MS.py @@ -94,13 +94,13 @@ def query(session, city, num, name, suffix, addrStr): def run(row): - while True: - try: - session = Session() - city, num, name, suffix, addrStr = getValues(row) - ppData = query(session, city, num, name, suffix, addrStr) - pollingInfo = getOutputValues(ppData) - return pollingInfo - except Exception as inst: - print type(inst) - print inst + try: + session = Session() + city, num, name, suffix, addrStr = getValues(row) + ppData = query(session, city, num, name, suffix, addrStr) + pollingInfo = getOutputValues(ppData) + return pollingInfo + except Exception as inst: + print type(inst) + print inst + return '', '', '' diff --git a/vip/NM.py b/vip/NM.py new file mode 100644 index 0000000..9227755 --- /dev/null +++ b/vip/NM.py @@ -0,0 +1,107 @@ +from bs4 import BeautifulSoup +import urllib2 +import re +import requests + + +def getFormSoup(url): + response = urllib2.urlopen(url) + soup = BeautifulSoup(response.read()) + return soup + + +def getCounties(soup): + countyDict = {} + countySelector = soup.find('select', {'name': 'county'}) + countyOptions = countySelector.find_all('option') + for county in countyOptions: + name = county.text + code = county.get('value') + countyDict[name] = code + return countyDict + + +def generatePayload(county, addrStr, zipcode, counties): + payload = { + 'action': 'Search', + 'countyRequired': 'true', + 'selectSearchCriteria': '2', + 'county': counties[county.upper()], + 'electionCombo': '20726_200000', + 'nameLast': '', + 'dobMonth': '0', + 'dobDay': '0', + 'dobYear': '0', + 'voterId': '', + 'DLN': '', + 'address': addrStr, + 'zipcode': zipcode, + 'search': 'Search' + } + return payload + + +def getResponseSoup(payload, url): + response = requests.post(url, data=payload, verify=False) + soup = BeautifulSoup(response.text) + return soup + + +def getPollingPlace(soup): + pollingPlaces = soup.find_all('div', {'id': 'polling-place'}) + ppid = '' + address = '' + name = '' + if len(pollingPlaces) > 0: + pollingDict = {} + election = pollingPlaces[0] + labels = election.find_all('span', {'class': 'label'}) + data = election.find_all('span', {'class': 'data'}) + for i in range(len(labels)): + pollingDict[labels[i].text] = data[i].text + zip = '' + street = '' + city = '' + defaultPP = pollingPlaces[len(pollingPlaces) - 1] + labels = defaultPP.find_all('span', {'class': 'label'}) + data = defaultPP.find_all('span', {'class': 'data'}) + for i in range(len(labels)): + if re.search('Zip', labels[i].text): + zip = data[i].text + else: + pollingDict[labels[i].text] = data[i].text + name = pollingDict['Name'] + if 'Ballot Style' in pollingDict: + ppid = pollingDict['Ballot Style'] + if 'Address' in pollingDict: + street = pollingDict['Address'] + if 'City' in pollingDict: + city = pollingDict['City'] + address = '{0} {1}, {2} {3}'.format(street, city, 'NM', zip + ).replace(' ', ' ' + ).replace(' ', ' ') + return ppid, name, address + + +def getValues(row): + county = row['vf_county_name'] + num = row['vf_reg_cass_street_num'] + predir = row['vf_reg_cass_pre_directional'] + name = row['vf_reg_cass_street_name'] + suffix = row['vf_reg_cass_street_suffix'] + postdir = row['vf_reg_cass_post_directional'] + zipcode = row['vf_reg_cass_zip'] + addrStr = '{0} {1} {2} {3} {4}'.format(num, predir, name, suffix, postdir) + addrStr.strip().replace(' ', ' ').replace(' ', ' ') + return county, addrStr, zipcode + + +def run(row): + county, addrStr, zipcode = getValues(row) + baseURL = 'https://voterview.state.nm.us/VoterView/PollingPlaceSearch.do' + form = getFormSoup(baseURL) + counties = getCounties(form) + payload = generatePayload(county, addrStr, zipcode, counties) + response = getResponseSoup(payload, baseURL) + pollingInfo = getPollingPlace(response) + return pollingInfo diff --git a/vip/README.md b/vip/README.md new file mode 100644 index 0000000..2aa4c6e --- /dev/null +++ b/vip/README.md @@ -0,0 +1,34 @@ +#VIP QA Scripts +This folder contains the scripts which QA early and election-day voting sites. Each of the scripts are summarized below. + + ++ *readData.py* contains a very simple function (read()) which accepts a directory and a state abbreviation. Reads in a csv named {state abbreviation}.csv from the provided directory. + + ++ *civicInfo.py* interacts with the Google CivicInfo API. Contains 3 functions. + + *getVoterInfo()* returns a decoded json object for the provided address string and electionID, by default the electionID is set to 4100-the 2014 General Election. + + *getVIPValues()* accepts the object provided by getVoterInfo() and returns the election day polling location details. + + *getEVValues()* accepts the object provided by getVoterInfo() and returns the early-vote polling location details. + + ++ *sheets.py* interacts with the Google Drive and Spreadsheets API's includes functions which can be used to copy a Google Sheet and then write rows to it. + + *getService()* accepts as arguments the service name, version, and credential object. It creates and authorizes a service which can be used with the Google API client library + + *copySheet()* accepts as arguments a sheet name, an original sheet key, and a credentials object. It uses the Google API client library (and calls getService()) to copy a sheet template. + + *getClient()* creates a gData spreadsheets client and authorizes it using stored credentials (created in app/views.py) + + *convertRow()* accepts a dictionary with row values. It replaces the keys in the dictionary with lower-case, spaceless versions of themselves. + + *writeRow()* accepts a dictionary with row values, a gdata client, a sheetKey, and sheetID (sheet within the google spreadsheet). It converts the input dictionary using convertRow() and then writes it to the sheet provided. + + ++ *geocode.py* geocodes and finds the distance between address strings. + + *geocode()* accepts a string containing an address and returns a dictionary with the latitude (lat) and longitude (lng) of the best match for that address. + + *haversine()* accepts two location dictionaries and returns the haversine distance (in miles) between them. + + ++ The scripts named *{state abbreviation}.py* each have a different structure based on the state on which they work. The unifying factor is a function titled *run()* which accepts a row dictionary from the TargetSmart data and returns the Polling Place ID, name, and address. + + ++ *execute.py* calls all of the other scripts in this folder to read and process VIP QA data. + + *getRowData()* accepts a row dictionary from TargetSmart data, and returns an address string and county which can be used to query the civicInfo API. + + *VIP()* accepts a state abbreviation and credentials object which reads in that state's TargetSmart data, creates a Google Spreadsheet, and then iterates over the data to query the civicInfo API, query the state lookup tool, and write the returned data to the new Google Spreadsheet. + + *EVIP()*: accepts a state abbreviation, a credentials object, and a boolean indicating whether to run 25 or 50 rows. This function reads in state Data, creates a google spreadsheet, and then iterates over rows querying the Google CivicInfo API and writing returned data to the new spreadsheet. No SOS tools were created for EV data. + diff --git a/vip/SC.py b/vip/SC.py new file mode 100644 index 0000000..c28af18 --- /dev/null +++ b/vip/SC.py @@ -0,0 +1,81 @@ +from bs4 import BeautifulSoup +from requests import Session + + +def getValues(row): + fname = row['tsmart_first_name'] + lname = row['tsmart_last_name'] + county = row['vf_county_name'] + dobStr = row['voterbase_dob'] + dob = '{0}/{1}/{2}'.format(int(dobStr[4:6]), int(dobStr[6:8]), + int(dobStr[:4])) + return fname, lname, dob, county + + +def getCounties(soup): + counties = {} + selectName = 'ctl00$cphMain$ddlCounty$input' + select = soup.find('select', {'name': selectName}) + for item in select.find_all('option'): + counties[item.text.strip().upper()] = item.get('value') + return counties + + +def getOutputValues(soup): + ppid = '' + name = '' + address = '' + baseName = 'ctl00_cphMain_VoterInfoUserControl_{0}_DisplayOnly' + ppidSoup = soup.find('span', {'id': baseName.format('VotingPrecinctControl')}) + nameSoup = soup.find('span', {'id': baseName.format('PrecinctLocationControl')}) + addressSoup = soup.find('span', {'id': baseName.format('PrecinctAddress')}) + if ppidSoup is not None: + ppid = ppidSoup.get_text().strip() + name = nameSoup.get_text().strip() + address = addressSoup.get_text().strip() + return ppid, name, address + + +def getHiddenValues(soup): + form = soup.find('form', {'name': 'aspnetForm'}) + fields = {} + for item in form.find_all('input', {'type': 'hidden'}): + fields[item.get('name')] = item.get('value') + return fields + + +def query(fname, lname, dob, county, fields, counties, formURL, session): + resultsURL = 'https://info.scvotes.sc.gov/Eng/VoterInquiry/' + resultsURL += 'VoterInformation.aspx' + county = counties[county.upper()] + fields['ctl00$cphMain$txtFirstName$input'] = fname + fields['ctl00$cphMain$txtLastName$input'] = lname + fields['ctl00$cphMain$dobDateOfBirth$input'] = dob + fields['ctl00$cphMain$ddlCounty$input'] = county + fields['ctl00$buttonContent$txtHiddenCountyValue'] = county + fields['ctl00$buttonContent$btnSubmit'] = 'Submit' + session.post(formURL, data=fields) + response = session.get(resultsURL) + html = response.text + return html + + +def run(row): + formURL = 'https://info.scvotes.sc.gov/eng/voterinquiry/' + formURL += 'VoterInformationRequest.aspx?PageMode=VoterInfo' + try: + session = Session() + fname, lname, dob, county = getValues(row) + response = session.get(formURL) + soup = BeautifulSoup(response.text) + hiddenFields = getHiddenValues(soup) + counties = getCounties(soup) + html = query(fname, lname, dob, county, hiddenFields, counties, + formURL, session) + soup = BeautifulSoup(html) + pollingInfo = getOutputValues(soup) + return pollingInfo + except Exception as inst: + print type(inst) + print inst + return '', '', '' diff --git a/vip/TX.py b/vip/TX.py new file mode 100644 index 0000000..f989491 --- /dev/null +++ b/vip/TX.py @@ -0,0 +1,404 @@ +from bs4 import BeautifulSoup +from requests import Session +import Levenshtein +import re +import json + + +def getValues(row): + num = row['vf_reg_cass_street_num'] + predir = row['vf_reg_cass_pre_directional'] + name = row['vf_reg_cass_street_name'] + suffix = row['vf_reg_cass_street_suffix'] + postdir = row['vf_reg_cass_post_directional'] + city = row['vf_reg_cass_city'] + zipcode = row['vf_reg_cass_zip'] + county = row['vf_county_name'] + date = str(row['voterbase_dob']) + lastName = row['tsmart_last_name'] + firstName = row['tsmart_first_name'] + dob = '' + if len(date) == 8: + dob = '{0}/{1}/{2}'.format(date[4:6], date[6:8], date[:4]) + return num, predir, name, suffix, postdir, city, zipcode, county, dob, firstName, lastName + + +def getHiddenValues(form): + fields = {} + for item in form.find_all('input', {'type': 'hidden'}): + fields[item.get('name')] = item.get('value') + return fields + + +def matchString(string, stringList): + maximum = 0 + string = str(string.strip().upper()) + optionList = [] + for text in stringList: + newstring = str(text[0].strip().upper()) + score = Levenshtein.ratio(string, newstring) + maximum = max(maximum, score) + optionList.append((score, text[1])) + for option in optionList: + if maximum == option[0]: + return str(option[1]) + + +def processBlanks(value, replacement): + if value == '': + value = replacement + return value + + +def getBexar(num, predir, name, suffix, postdir, zipcode): + session = Session() + url = 'http://apps.bexar.org/ElectionSearch/ElectionSearch.aspx?psearchtab=1' + response = session.get(url) + soup = BeautifulSoup(response.text) + form = soup.find('form', {'id': 'form1'}) + fields = getHiddenValues(form) + addrStr = '{0} {1} {2} {3} {4}'.format(num, predir, name, suffix, postdir) + addrStr = addrStr.strip().replace(' ', ' ').replace(' ', ' ') + fields['tab2street'] = addrStr + fields['tab2zipcode'] = zipcode + fields['btnTab2'] = 'Search' + response = session.post(url, data=fields) + soup = BeautifulSoup(response.text) + results = str(soup.find('div', {'id': 'DivResultFound'}).find('h3')) + ppid = re.sub('^.* ([Pp][Rr][Ee][Cc][Ii][Nn][Cc][Tt] [0-9A-Za-z]*) .*$', + '\\1', results.replace('\n', '')) + resultList = results.split('
') + name = resultList[3].strip() + address = resultList[4].strip() + return ppid, name, address + + +def getHarris(lastName, firstName, num, name): + url = 'http://www.harrisvotes.org/VoterBallotSearch.aspx?L=E' + session = Session() + response = session.get(url) + soup = BeautifulSoup(response.text, 'lxml') + form = soup.find('form') + fields = getHiddenValues(form) + baseName = 'ctl00$ContentPlaceHolder1$' + fields[baseName + 'txtLastName'] = lastName + fields[baseName + 'txtFirstName'] = firstName + fields[baseName + 'txtHouseNo'] = num + fields[baseName + 'txtStreet'] = name + fields[baseName + 'btnSearchNA'] = 'Search' + response = session.post(url, data=fields) + soup = BeautifulSoup(response.text, 'lxml') + baseName = 'ctl00_ContentPlaceHolder1_GridViewA_ctl02_GridViewLocations_' + ppid = '' + name = soup.find('span', {'id': baseName + 'ctl02_lblLocation'}).string + name = name.strip() + address = soup.find('span', {'id': baseName + 'ctl02_lblPollingAddress'}).string.split(',')[0] + address = address.strip() + ' ' + address += soup.find('span', + {'id': baseName + 'ctl02_lblPollingCity'}).string + address = address.strip() + ', TX' + return ppid, name, address + + +def getDallas(firstName, lastName, dob): + url = 'http://dallas-tx.mobile.clarityelections.com/mobile/seam/resource/rest/voter/find' + payload = { + 'VOTER_ELIGIBILITY_LOOKUP_FIRST_NAME': firstName, + 'VOTER_ELIGIBILITY_LOOKUP_LAST_NAME': lastName, + 'VOTER_ELIGIBILITY_LOOKUP_BIRTH_DATE': dob + } + session = Session() + response = session.get(url, params=payload) + text = response.text.replace('\n', '') + text = re.sub('^null\\((.*)\\)$', '\\1', text) + data = json.loads(text)[0] + precinctInfo = data['precinct'] + ppid = '' + address = '' + if 'name' in precinctInfo: + ppid = precinctInfo['name'] + ppInfo = precinctInfo['defaultPollingPlace'] + name = ppInfo['name'] + addrDict = ppInfo['streetAddress'] + address = '{0} {1} {2} {3}, TX {4}'.format(addrDict['address1'], + addrDict['address2'], + addrDict['address3'], + addrDict['city'], + addrDict['zip']) + return ppid, name, address + + +def getEPaDC(num, predir, name, suffix, postdir, city, zipcode, url): + city = city.upper() + session = Session() + url = 'http://{0}/mobile/seam/resource/rest/precinct/'.format(url) + addrStr = '{0} {1} {2} {3} {4} {5} {6}'.format(num, predir, name, suffix, + postdir, city, zipcode) + data = { + 'PRECINCT_FINDER_ADDRESS_NUMBER': num, + 'PRECINCT_FINDER_STREET_NAME': name, + 'PRECINCT_FINDER_APARTMENT_NUMBER': '', + 'PRECINCT_FINDER_CITY': city, + 'lang': 'en' + } + response = session.get(url + 'findstreet', params=data) + addrData = json.loads(response.text) + addresses = addrData['streets'] + addrList = [] + for item in addresses: + addr = '{0} {1} {2} {3} {4} {5} {6}'.format(item['address'], + item['predir'], + item['street'], + item['type'], + item['postdir'], + item['city'], + item['zipcode']) + addrList.append((addr, item['precinct'])) + precinct = matchString(addrStr, addrList) + data = { + 'precinctId': precinct, + 'lang': 'en' + } + response = session.get(url + 'precinctdetail', params=data) + precinctData = json.loads(response.text) + ppid = '' + if 'precinctName' in precinctData: + ppid = precinctData['precinctName'] + ppInfo = precinctData['electionPrecincts'][0]['pollingPlace'] + name = ppInfo['name'] + addrDict = ppInfo['streetAddress'] + address = '{0} {1} {2}, TX {3}'.format(addrDict['address1'], + addrDict['address2'], + addrDict['city'], + addrDict['zip']) + return ppid, name, address + + +def getHidalgo(firstName, lastName, dob): + url = 'http://apps.co.hidalgo.tx.us/VoterLookup/Lookup/Results' + year = dob[6:] + session = Session() + data = {'LastName': lastName, 'FirstName': firstName, 'DOBYear': year} + response = session.post(url, data=data) + soup = BeautifulSoup(response.text) + pageDict = {} + for item in soup.find_all('div'): + label = item.find('span', {'class': 'field-label'}) + value = item.find('span', {'class': 'field-value'}) + if label is not None and value is not None: + for string in value.strings: + pageDict[label.get_text().strip()] = string.strip() + break + ppid = '' + name = '' + address = '' + if 'Precinct' in pageDict: + ppid = pageDict['Precinct'] + if 'Location:' in pageDict: + name = pageDict['Location:'] + if 'Address:' in pageDict: + address = pageDict['Address:'] + if 'City:' in pageDict: + address += ' ' + pageDict['City:'] + address += ' TX' + return ppid, name, address + + +def getFBC(firstName, lastName, dob): + listURL = 'http://www.fortbendcountytx.gov/index.aspx?page=1099' + formURL = 'https://progprod.co.fort-bend.tx.us/Voter/default.aspx' + session = Session() + response = session.get(listURL) + soup = BeautifulSoup(response.text) + siteDict = {} + tables = soup.find_all('table', {'id': re.compile('ctl00_listDataGrid_')}) + for table in tables: + header = table.find('td', {'class': 'facility_header_cell'}) + values = header.string.split('|') + name = values[0].strip() + link = table.find('a', {'id': re.compile('googleMapHyperLink')}) + address = ' '.join(link.strings) + location = {'name': name, 'address': address} + for i in range(1, len(values)): + siteDict[values[i].strip()] = location + response = session.get(formURL, verify=False) + formURL = response.url + soup = BeautifulSoup(response.text) + fields = getHiddenValues(soup.find('form')) + fields['voterLname'] = lastName + fields['voterFname'] = firstName + fields['voterDOB'] = dob + fields['sS'] = 'Start Search' + fields['type'] = 'voterLname' + fields['type1'] = 'voterCNumber' + fields['voterDate'] = 'null' + fields['voterCNumber'] = '' + fields['formFirstName'] = '' + fields['formLastName'] = '' + response = session.post(formURL, data=fields, verify=False) + soup = BeautifulSoup(response.text) + ppid = soup.find('span', {'id': 'Precinct'}).string.strip() + name = '' + address = '' + if ppid in siteDict: + location = siteDict[ppid] + name = location['name'] + address = location['address'] + return ppid, name, address + + +def getMontgomery(firstName, lastName, dob): + ppid = '' + name = '' + address = '' + session = Session() + url = 'http://www.mctx.org/electioninfo/voterlookupresult.aspx?curLang=English' + session.get(url) + data = { + 'LNAME': lastName, + 'FNAME': firstName, + 'DOBM': dob[:2], + 'DOBD': dob[3:5], + 'DOBY': dob[6:], + 'SUBMIT1': 'Search' + } + response = session.post(url, data=data) + soup = BeautifulSoup(response.text) + table = soup.find('table', {'id': 'dgrElectionsNew'}) + link = table.find('a', {'href': re.compile('drvDirectionsNew')}) + name = table.find_all('tr')[1].find_all('td')[4].get_text() + infoURL = link.get('href').replace('..', 'http://www.mctx.org') + response = session.get(infoURL) + soup = BeautifulSoup(response.text) + location = soup.find('font', {'size': '4'}) + lList = [] + for line in location.strings: + lList.append(line.strip()) + address = '' + for i in range(1, len(lList) - 1): + if len(address) > 0: + address += ' ' + address += lList[i] + return ppid, name, address + + +def getElectionDaySite(county, lastName, firstName, dob, zipcode): + url = 'https://team1.sos.state.tx.us/voterws/viw/faces/SearchSelectionPolling.jsp' + session = Session() + response = session.get(url, verify=False) + soup = BeautifulSoup(response.text) + hidden = getHiddenValues(soup.find('form')) + data = { + 'form1:radio1': 'N', + 'form1:button1': 'Next (Siga) >', + 'com.sun.faces.VIEW': hidden['com.sun.faces.VIEW'], + 'form1': 'form1' + } + response = session.post(url, data=data, verify=False) + soup = BeautifulSoup(response.text, 'lxml') + hidden = getHiddenValues(soup.find('form')) + select = soup.find('select') + counties = [] + for option in select.find_all('option'): + counties.append((option.string.upper(), option.get('value'))) + county = matchString(county.upper(), counties) + data = { + 'form1:menu2': county, + 'form1:lastName': lastName, + 'form1:firstName': firstName, + 'form1:tdlMonth': dob[:2], + 'form1:tdlDay': dob[3:5], + 'form1:tdlYear': dob[6:], + 'form1:zip': zipcode, + 'form1:button1': 'Next (Siga) >', + 'com.sun.faces.VIEW': hidden['com.sun.faces.VIEW'], + 'form1': 'form1' + } + response = session.post(url, data=data, verify=False) + soup = BeautifulSoup(response.text) + ppid = soup.find('span', {'id': 'form1:format7'}).string + name = '' + address = '' + listLink = soup.find('a', {'id': 'form1:linkEx2'}) + if listLink is not None: + name = listLink.get('href') + try: + select = soup.find('select') + hidden = getHiddenValues(soup.find('form')) + elections = [] + election = '2014 NOVEMBER 4TH GENERAL ELECTION' + for option in select.find_all('option'): + elections.append((option.string.upper(), option.get('value'))) + election = matchString(election, elections) + data = { + 'form1:menu1': election, + 'form1:radio2': 'ED', + 'form1:button1': 'Next (Siga) >', + 'com.sun.faces.VIEW': hidden['com.sun.faces.VIEW'], + 'form1': 'form1' + } + url = 'https://team1.sos.state.tx.us/voterws/viw/faces/DisplayVoter.jsp' + response = session.post(url, data=data, verify=False) + soup = BeautifulSoup(response.text) + nameSpan = soup.find('span', {'id': 'form1:format10'}) + line1 = soup.find('span', {'id': 'form1:format1'}) + line2 = soup.find('span', {'id': 'form1:format4'}) + city = soup.find('span', {'id': 'form1:format2'}) + zipcode = soup.find('span', {'id': 'form1:format8'}) + if nameSpan is not None: + name = nameSpan.get_text().strip() + if line1 is not None: + address = line1.get_text().strip() + if line2 is not None: + if len(address) > 0: + address += ' ' + address += line2.get_text().strip() + if city is not None: + if len(address) > 0: + address += ' ' + address += city.get_text().strip() + if zipcode is not None: + if len(address) > 0: + address += ' ' + address += zipcode.get_text().strip() + except Exception as error: + print type(error) + print error + return ppid, name, address + + +def run(row): + num, predir, name, suffix, postdir, city, zipcode, county, dob, firstName, lastName = getValues(row) + try: + if county.upper() == 'BEXAR': + pollingInfo = getBexar(num, predir, name, suffix, postdir, zipcode) + elif county.upper() == 'HARRIS': + pollingInfo = getHarris(lastName, firstName, num, name) + elif county.upper() == 'DALLAS': + pollingInfo = getDallas(firstName, lastName, dob) + elif county.upper() == 'EL PASO': + url = 'www.epcountyvotes.com/ce' + pollingInfo = getEPaDC(num, predir, name, suffix, postdir, city, + zipcode, url) + elif county.upper() == 'DENTON': + url = 'www.votedenton.com/ce' + pollingInfo = getEPaDC(num, predir, name, suffix, postdir, city, + zipcode, url) + elif county.upper() == 'HIDALGO': + pollingInfo = getHidalgo(firstName, lastName, dob) + elif county.upper() == 'FORT BEND': + pollingInfo = getFBC(firstName, lastName, dob) + elif county.upper() == 'MONTGOMERY': + pollingInfo = getMontgomery(firstName, lastName, dob) + elif county.upper() == 'JEFFERSON': + url = 'jefferson-tx.mobile.clarityelections.com' + pollingInfo = getEPaDC(num, predir, name, suffix, postdir, city, + zipcode, url) + else: + pollingInfo = getElectionDaySite(county, lastName, firstName, + dob, zipcode) + return pollingInfo + except Exception as inst: + print type(inst) + print inst + return '', '', '' diff --git a/vip/WV.py b/vip/WV.py new file mode 100644 index 0000000..3e77405 --- /dev/null +++ b/vip/WV.py @@ -0,0 +1,75 @@ +from bs4 import BeautifulSoup +from requests import Session +import json +import time + + +def getValues(row): + fname = row['tsmart_first_name'] + lname = row['tsmart_last_name'] + dobStr = row['voterbase_dob'] + dob = '{0}/{1}/{2}'.format(dobStr[4:6], dobStr[6:8], dobStr[:4]) + return fname, lname, dob + + +def getOutputValues(soup): + ppid = '' + address = '' + table = soup.find('table', {'id': 'tableResults'}).find_all('tr') + row = table[2] + values = row.find_all('td')[3].get_text().split('***') + name = values[0].strip() + for i in range(1, len(values)): + if len(address) > 0: + address += ' ' + address += values[i].strip() + return ppid, name, address + + +def getHiddenValues(soup): + form = soup.find('form', {'name': 'Form1'}) + fields = {} + manualhide = form.find('div', {'class': 'displayNone printNone'}) + for item in manualhide.find_all('input'): + value = item.get('value') + if value is not None: + fields[item.get('name')] = value + else: + fields[item.get('name')] = '' + for item in form.find_all('input', {'type': 'hidden'}): + fields[item.get('name')] = item.get('value') + fields['__EVENTTARGET'] = '' + fields['__EVENTARGUMENT'] = '' + return fields + + +def query(fname, lname, dob, fields, formURL, session): + baseName = 'ctl00$MainContent$' + fields[baseName + 'txtNameFirst'] = fname + fields[baseName + 'txtNameLast'] = lname + fields[baseName + 'txtDob'] = dob + fields[baseName + 'btnSubmit'] = 'Submit' + with open('/home/michael/Desktop/output.json', 'w') as outFile: + outFile.write(json.dumps(fields, indent=4)) + response = session.post(formURL, data=fields) + html = response.text + return html.replace(u'\u2014', '-').replace('
', '***') + + +def run(row): + formURL = 'https://apps.sos.wv.gov/elections/voter/find-polling-place.aspx' + try: + session = Session() + fname, lname, dob = getValues(row) + response = session.get(formURL) + time.sleep(2) + soup = BeautifulSoup(response.text) + hiddenFields = getHiddenValues(soup) + html = query(fname, lname, dob, hiddenFields, formURL, session) + soup = BeautifulSoup(html) + pollingInfo = getOutputValues(soup) + return pollingInfo + except Exception as inst: + print type(inst) + print inst + return '', '', '' diff --git a/vip/execute.py b/vip/execute.py index bc4da47..842cfcf 100644 --- a/vip/execute.py +++ b/vip/execute.py @@ -41,13 +41,16 @@ def VIP(state, creds): 'Google PP ID': gppid, 'Google PP Name': gname, 'Google PP Address': gaddress } - glocation = geocode.geocode(gaddress) - soslocation = geocode.geocode(sosaddress) - if glocation is not None and soslocation is not None: - distance = geocode.haversine(glocation, soslocation) - rowDict['SOS Location'] = str(soslocation) - rowDict['Google Location'] = str(glocation) - rowDict['Distance'] = str(distance) + try: + glocation = geocode.geocode(gaddress) + soslocation = geocode.geocode(sosaddress) + if glocation is not None and soslocation is not None: + distance = geocode.haversine(glocation, soslocation) + rowDict['SOS Location'] = str(soslocation) + rowDict['Google Location'] = str(glocation) + rowDict['Distance'] = str(distance) + except Exception: + pass while True: try: sheets.writeRow(rowDict, client, sheet['id'])