I am trying to perform gradient descent to compute three MLEs (from scratch). I have data $x_i=s_i+w_i$ where $s_i=A(nu_i/nu_0)^{alpha}(nu_i/nu_0+1)^{-4alpha}$ where I have calculated the first derivatives analytically:
import numpy as np
import matplotlib.pyplot as plt
#model function s_i
def signal(A,nu_0,alpha,nu):
return A*(nu/nu_0)**alpha*(1+nu/nu_0)**(-4*alpha)
#partial derivatives of log likelihood function
def MLE_A(A,nu_0,alpha,nu,x_i):
nu=np.array(nu)
x_i=np.array(x_i)
return -np.sum(((nu/nu_0)**alpha*((A*(nu/nu_0)**alpha)/(nu/nu_0+1)**(4*alpha)-x_i))/(nu/nu_0+1)**(4*alpha))
def MLE_alpha(A,nu_0,alpha,nu,x_i):
nu=np.array(nu)
x_i=np.array(x_i)
return -np.sum((A*(nu/nu_0)**alpha*(4*np.log(nu/nu_0+1)-np.log(nu/nu_0))*(x_i*(nu/nu_0+1)**(4*alpha)-A*(nu/nu_0)**alpha))/(nu/nu_0+1)**(8*alpha))
def MLE_nu_0(A,nu_0,alpha,nu,x_i):
nu=np.array(nu)
x_i=np.array(x_i)
return -np.sum((A*alpha*(nu/nu_0)**(alpha)*(nu_0-3*nu)*((x_i*((nu)/nu_0+1)**(4*alpha))-A*(nu/nu_0)**alpha))/(nu_0*(nu+nu_0)*((nu)/nu_0+1)**(8*alpha)))
def gradient_descent(A_init,nu_0_init,alpha_init,nu,x_i,iterations=1000,learning_rate=0.01):
A=A_init
nu_0=nu_0_init
alpha=alpha_init
theta=np.array([A_init,nu_0_init,alpha_init])
updated_theta=[theta]
for i in range(iterations):
new_theta = theta - learning_rate * np.array([MLE_A(A,nu_0,alpha,nu,x_i), MLE_nu_0(A,nu_0,alpha,nu,x_i), MLE_alpha(A,nu_0,alpha,nu,x_i)])
theta=new_theta
updated_theta.append(theta)
A,nu_0,alpha = new_theta[0],new_theta[1],new_theta[2]
return(updated_theta)
A=6
nu_0=2
alpha=1
nu=np.linspace(0.05,1.0,200)
x_i=signal(A,nu_0,alpha,nu)+np.random.normal(0,0.05,len(nu))
params= gradient_descent(A,nu_0,alpha,nu,x_i,iterations=10000,learning_rate=0.01)
print(params[-1])
A_fit=params[-1][0]
nu_0_fit=params[-1][1]
alpha_fit=params[-1][2]
plt.plot(nu,x_i)
plt.plot(nu,signal(A_fit,nu_0_fit,alpha_fit,nu))
plt.show()
Sometimes I get errors like RuntimeWarning: overflow encountered in power and RuntimeWarning: invalid value encountered in true_divide and sometimes I get wildly off values. I used different values for the learning rate and it did not fix it. I have checked the functions by hand and using symbolic software. Also, I used latexify to see that I did type them in right so I am assuming it is my implementation of the gradient descent that is somehow off.
Any help is HUGELY appreciated.