1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
| import numpy as np import requests
def get_data(url): content=requests.get(url).content content=content.decode('utf-8') content=content.split("\n") X=[] Y=[] for line in content[:-1]: data=line.split() y=float(data[-1]) Y.append(y) x=data[:-1] for i in range(len(x)): x[i]=float(x[i]) X.append([1.0]+x) X=np.mat(X) Y=np.array(Y) return X,Y
def init(X): return np.zeros((X.shape[1],1))
def sigmoid(s): return 1/(1+np.exp(-s))
def grad(w,X,Y,i): s=-Y[i]*X[i]*w return -Y[i]*sigmoid(s).item()*X[i].T
def sign(v): if v>=0.5: return 1 else: return -1 def cost(w,X,Y): err=0 y_hat=sigmoid(X*w).T y_hat=y_hat.tolist() for i in range(len(Y)): if(sign(y_hat[0][i])!=Y[i]): err+=1 return err/len(Y)
if __name__=="__main__": trainX,trainY=get_data("https://www.csie.ntu.edu.tw/~htlin/mooc/datasets/mlfound_algo/hw3_train.dat") testX,testY=get_data("https://www.csie.ntu.edu.tw/~htlin/mooc/datasets/mlfound_algo/hw3_test.dat") w=init(trainX) l_r=0.01 for i in range(2000): w=w-l_r*grad(w,trainX,trainY,i%len(trainY)) print(cost(w,testX,testY))
|