import gzip
from collections import defaultdict
from random import random

def readGz(f):
  for l in gzip.open(f):
    yield eval(l)

### Helpfulness baseline: similar to the above. Compute the global average helpfulness rate, and the average helpfulness rate for each user

nUser = defaultdict(int)
nItem = defaultdict(int)

ratings = []
for l in readGz("train.json.gz"):
  ratings.append((l['reviewerID'], l['itemID'], l['rating']))
  nUser[l['reviewerID']] += 1
  nItem[l['itemID']] += 1

nTrain = len(ratings)

lamb = 1.0
alpha = 0
userBeta = defaultdict(float)
itemBeta = defaultdict(float)

objective = None

while (True):
  #update alpha
  alpha = 0
  for user, item, rating in ratings:
    alpha += (rating - (userBeta[user] + itemBeta[item])) / nTrain
  #update user terms
  userBeta = defaultdict(float)
  for user, item, rating in ratings:
    userBeta[user] += (rating - (alpha + itemBeta[item])) / (lamb + nUser[user])
  #update item terms
  itemBeta = defaultdict(float)
  for user, item, rating in ratings:
    itemBeta[item] += (rating - (alpha + userBeta[user])) / (lamb + nItem[item])
  oldObjective = objective
  objective = 0
  for user, item, rating in ratings:
    objective += (rating - (alpha + userBeta[user] + itemBeta[item]))**2
  for u in userBeta:
    objective += lamb * userBeta[u]**2
  for i in itemBeta:
    objective += lamb * itemBeta[i]**2 
  print objective
  if (oldObjective and oldObjective - objective < 0.0000001*objective):
    break

### Gradient descent

alpha = 0

for user in userBeta:
  userBeta[user] = 0

for item in itemBeta:
  itemBeta[item] = 0

objective = None

stepsize = 0.000001

while (True):
  dalpha = 0
  duserBeta = defaultdict(float)
  ditemBeta = defaultdict(float)
  for user, item, rating in ratings:
    residual = alpha + userBeta[user] + itemBeta[item] - rating
    dalpha += 2*residual
    duserBeta[user] += 2*residual
    ditemBeta[item] += 2*residual
  for user in userBeta:
    duserBeta[user] += 2*lamb*userBeta[user]
  for item in itemBeta:
    ditemBeta[item] += 2*lamb*itemBeta[item]
  alpha -= stepsize*dalpha
  for user in userBeta:
    userBeta[user] -= stepsize*duserBeta[user]
  for item in itemBeta:
    itemBeta[item] -= stepsize*ditemBeta[item]
  oldObjective = objective
  objective = 0
  for user, item, rating in ratings:
    objective += (rating - (alpha + userBeta[user] + itemBeta[item]))**2
  for u in userBeta:
    objective += lamb * userBeta[u]**2
  for i in itemBeta:
    objective += lamb * itemBeta[i]**2 
  print objective
  if (oldObjective and oldObjective - objective < 0.0000001*objective):
    break
