1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import numpy as np
import requests

def get_data(url):
content=requests.get(url).content
content=content.decode('utf-8')
content=content.split("\n")
X=[]
Y=[]
for line in content[:-1]:
data=line.split()
y=float(data[-1])
Y.append(y)
x=data[:-1]
for i in range(len(x)):
x[i]=float(x[i])
X.append([1.0]+x)
X=np.mat(X)
Y=np.array(Y)
return X,Y

def init(X):
return np.zeros((X.shape[1],1))

def sigmoid(s):
return 1/(1+np.exp(-s))


def grad(w,X,Y,i):
s=-Y[i]*X[i]*w
return -Y[i]*sigmoid(s).item()*X[i].T

def sign(v):
if v>=0.5:
return 1
else:
return -1
def cost(w,X,Y):
err=0
y_hat=sigmoid(X*w).T
y_hat=y_hat.tolist()
for i in range(len(Y)):
if(sign(y_hat[0][i])!=Y[i]):
err+=1
return err/len(Y)

if __name__=="__main__":
trainX,trainY=get_data("https://www.csie.ntu.edu.tw/~htlin/mooc/datasets/mlfound_algo/hw3_train.dat")
testX,testY=get_data("https://www.csie.ntu.edu.tw/~htlin/mooc/datasets/mlfound_algo/hw3_test.dat")
w=init(trainX)
l_r=0.01
for i in range(2000):
w=w-l_r*grad(w,trainX,trainY,i%len(trainY))
print(cost(w,testX,testY))