from bs4 import BeautifulSoup from datetime import timedelta, datetime import numpy as np import matplotlib.pyplot as plt import requests base_url = 'http://www.basketball-reference.com' url = 'http://www.basketball-reference.com/leagues/NBA_2014.html' r = requests.get(url) soup = BeautifulSoup(r.text) team_list = [] for link in soup.find(id="team").tbody.find_all('a'): team_list.append(base_url+link.get('href')[:-5]+'_games.html') team_dict = {} for team in team_list: team_name = team.split('/')[4] r = requests.get(team) soup = BeautifulSoup(r.text) wl_dict = {'W':0, 'L':0} prev_date = datetime.today() for link in soup.find(id="teams_games").tbody.find_all('tr'): if link.td: date = datetime.strptime(link.contents[3]['csk'], "%Y-%m-%d") day = timedelta(days=1) if prev_date != '': if (date-day) == prev_date: wl_dict[link.contents[15].text] += 1 prev_date = date team_dict[team_name] = wl_dict N = len(team_dict) win_bars = [] lose_bars = [] team_name_list = [] for team in team_dict: win_bars.append(team_dict[team]['W']) lose_bars.append(team_dict[team]['L']) team_name_list.append(team) ind = np.arange(N) width = .35 fig, ax = plt.subplots() fig.autofmt_xdate() fig.set_size_inches(18.5,10.5) rects1 = ax.bar(ind, win_bars, width, color='g') rects2 = ax.bar(ind+width, lose_bars, width, color='r') ax.set_ylabel('Win/Loss Count') ax.set_xlabel('Team') ax.set_title('Win/Loss Count of the 2nd Game in a Back to Back 2013-2014') ax.set_xticks(ind+width) ax.set_xticklabels( team_name_list ) ax.legend( (rects1[0], rects2[0]), ('Win', 'Loss') ) def autolabel(rects): for rect in rects: height = rect.get_height() ax.text(rect.get_x()+rect.get_width()/2., 1.05*height, '%d'%int(height), ha='center', va='bottom') autolabel(rects1) autolabel(rects2) plt.ylim((0,25)) plt.show()
Monday, November 3, 2014
Exploring the NBA
Been trying to do some more data sciencey stuff with NBA data. Created a python script that scrapes data from www.basketball-reference.com/. Here's one that counts the wins and losses of the 2nd game in back-to-backs for the 2013-2014 NBA season of every team. Below is the graph and after the jump is the code.
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment