DigitRecognitionPython/SimpleNN2.py at master · sn2234/DigitRecognitionPython · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import numpy as np
import math as m
from scipy.special import expit
from scipy.optimize import minimize
import pickle

def sigmoidGradient(x):
    t = expit(x)
    return t*(1-t)

xlog = np.vectorize(lambda x: m.log(1e-17) if x == 0 else m.log(x))

class NeuralNetConfig:
    inputSize = 0
    hiddenSize = 0
    outputSize = 0

    def __init__(self, inputSize, hiddenSize, outputSize):
        self.inputSize = inputSize
        self.hiddenSize = hiddenSize
        self.outputSize = outputSize

def prepareLabels(y, numLabels):
    numberOfSamples = y.shape[0]
    yy = np.zeros((numberOfSamples, numLabels))

    for i in range(y.shape[0]):
        yy[i, int(y[i])] = 1

    return yy

def initRandomThetas(nn):
    epsilon_init = 0.12
    th1 = np.random.random((nn.hiddenSize, nn.inputSize + 1))*2*epsilon_init - epsilon_init
    th2 = np.random.random((nn.outputSize, nn.hiddenSize + 1))*2*epsilon_init - epsilon_init

    return (th1, th2)

def combineThetas(th1, th2):
    return np.concatenate((
            th1.flatten(),
            th2.flatten()
        ))

def splitThetas(nn, comb):
    th1_len = nn.hiddenSize * (nn.inputSize + 1)
    #th2_len = nn.outputSize * (nn.hiddenSize +1)

    th1 = comb[:th1_len].reshape((nn.hiddenSize, nn.inputSize + 1))
    th2 = comb[th1_len:].reshape((nn.outputSize, nn.hiddenSize + 1))

    return th1, th2

# L - number of samples
# ni - input layer size
# nh - hidden layer size
# no - output layer size (equals to number of labels)
# x => (L * ni)
# y => (L * 1)
# yy => (L * no)
# th1 => (nh * (ni+1))
# th2 => (no * (nh+1))

def forwardPropagation(nn, th1, th2, x):
    numberOfSamples = x.shape[0]

    ones = np.ones((numberOfSamples, 1)) # (L * 1)

    a1 = np.hstack((ones, x)) # (L * (ni+1))

    z2 = (th1 @ a1.T).T # (L * (nh))

    a2 = np.hstack((ones, expit(z2))) # (L * (nh+1))

    z3 = (th2 @ a2.T).T # (L * no)

    a3 = expit(z3) # (L * no)

    return a1, z2, a2, z3, a3

def computeCost(nn, th1, th2, x, y, reg_lambda):
    numberOfSamples = x.shape[0]

    yy = prepareLabels(y, nn.outputSize)

    a1, z2, a2, z3, a3 = forwardPropagation(nn, th1, th2, x)

    costPositive = (-yy) * xlog(a3)
    costNegative = (1 - yy) * xlog(1 - a3)

    reg = (reg_lambda/(2*numberOfSamples)) * (np.sum((th1[:, 1:])**2) + np.sum((th2[:, 1:])**2))

    cost = np.sum(costPositive - costNegative)/numberOfSamples + reg

    return cost

def computeGrad(nn, th1, th2, x, y, reg_lambda):
    numberOfSamples = x.shape[0]

    yy = prepareLabels(y, nn.outputSize)
    ones = np.ones((numberOfSamples, 1)) # (L * 1)

    a1, z2, a2, z3, a3 = forwardPropagation(nn, th1, th2, x)

    delta3 = a3 - yy # (L * no)
    delta2 = (th2.T @ delta3.T).T * np.hstack((ones, sigmoidGradient(z2))) # (L * (nh+1))
    delta2 = delta2[:, 1:] # (L * (nh))

    theta1_reg = th1.copy()
    theta1_reg[:, 0] = np.zeros((th1.shape[0]))

    theta2_reg = th2.copy()
    theta2_reg[:, 0] = np.zeros((th2.shape[0]))

    thetaGrad1 = (delta2.T @ a1)/numberOfSamples + (reg_lambda/numberOfSamples) * theta1_reg
    thetaGrad2 = (delta3.T @ a2)/numberOfSamples + (reg_lambda/numberOfSamples) * theta2_reg

    return thetaGrad1, thetaGrad2

def predictProbability(nn, th1, th2, x):
    numberOfSamples = x.shape[0]

    _, _, _, _, a3 = forwardPropagation(nn, th1, th2, x.reshape((1, numberOfSamples)))

    return a3

def predictClass(nn, th1, th2, x):
    probabilities = predictProbability(nn, th1, th2, x).flatten()

    pmax = 0.0
    imax = 0
    for i in range(len(probabilities)):
        if probabilities[i] > pmax:
            pmax = probabilities[i]
            imax = i

    return imax

def computeCostComb(nn, thComb, x, y, reg_lambda):
    th1, th2 = splitThetas(nn, thComb)
    return computeCost(nn, th1, th2, x, y, reg_lambda)

def computeGradComb(nn, thComb, x, y, reg_lambda):
    th1, th2 = splitThetas(nn, thComb)
    th1p, th2p = computeGrad(nn, th1, th2, x, y, reg_lambda)
    return combineThetas(th1p, th2p)

def saveNetwork(nn, th1, th2, filePath):
    with open(filePath, "wb") as f:
        pickle.dump((nn, th1, th2), f)

def loadNetwork(filePath):
    with open(filePath, "rb") as f:
        (nn, th1, th2) = pickle.load(f)
        return (nn, th1, th2)