Skip to main content

NBA h-model: code & data

The data and code below were used to determine the h-values shown in our NBA Christmas 2014 post.

Data file here. Code follows.

#NBA mean-score-potential. aka h model. 12/31/2014

import math
import numpy as np


dataFile ='2014.txt'
teams = ['Atlanta Hawks', 'Boston Celtics', 'Brooklyn Nets',
         'Charlotte Hornets', 'Chicago Bulls', 'Cleveland Cavaliers',
         'Dallas Mavericks', 'Denver Nuggets', 'Detroit Pistons',
         'Golden State Warriors', 'Houston Rockets', 'Indiana Pacers',
         'Los Angeles Clippers', 'Los Angeles Lakers', 'Memphis Grizzlies',
         'Miami Heat', 'Milwaukee Bucks', 'Minnesota Timberwolves',
         'New Orleans Pelicans', 'New York Knicks', 'Oklahoma City Thunder',
         'Orlando Magic', 'Philadelphia 76ers', 'Phoenix Suns',
         'Portland Trail Blazers', 'Sacramento Kings', 'San Antonio Spurs',
         'Toronto Raptors', 'Utah Jazz', 'Washington Wizards']


#Returns data for game when fed a line from the data file.
def getresults(line):
    homeID = teams.index(line[4])
    home_score = int(line[5])
    awayID = teams.index(line[2])
    away_score = int(line[3])
    y = home_score - away_score
    date = line[0]
    
    return homeID, awayID, date, y

#Tally prior matches and score differences; normalize
def getnums(file):
    play = [ [0 for i in range(30)] for j in range(30)]
    scorediff = [0 for i in range(30)]
    
    data = open(file,'r').readlines()
    for line in data:
        homeID, awayID, date, y = getresults(line.split(','))
        scorediff[homeID] += y
        scorediff[awayID] += -y
        play[homeID][awayID] +=1
        play[awayID][homeID] +=1
    for i in range(30):
        games = 1.0*sum(play[i])
        play[i] = [play[i][j]/games for j in range(30)]
        scorediff[i] = scorediff[i]/games

    return scorediff, play


#Singular matrix equation: M h = x.
scorediff, play = getnums(dataFile)
x = np.transpose(scorediff)
M = np.diag([1.0 for i in range(30)]) - np.array(play)
hvals = np.linalg.solve(M,x)
meanh = sum(hvals)/30.0
#subtract out meanh for aesthetics
hvals = [hvals[i] - meanh for i in range(30)]