🔥 Forward and backward pass

Let us put in the code fom the previous notebook, to do the imports an to load the data.

# Imports
from PythonInterface import Python

let pathlib = Python.import_module("pathlib") # Python standard library
let gzip = Python.import_module("gzip") # Python standard library
let pickle = Python.import_module("pickle") # Python standard library
let np = Python.import_module("numpy")

# Get the data
path_gz = pathlib.Path('./lost+found/data/mnist.pkl.gz')
f = gzip.open(path_gz, 'rb')
u = pickle._Unpickler(f)
u.encoding = 'latin1'
data = u.load()

data_train = data[0]
data_valid = data[1]

x_train = data_train[0]
y_train = data_train[1]
y_train = np.expand_dims(y_train, 1)

x_valid = data_valid[0]
y_valid = data_valid[1]
y_valid = np.expand_dims(y_valid, 1)
f.close()

from DType import DType
from Memory import memset_zero
from Object import object, Attr
from Pointer import DTypePointer, Pointer
from Random import rand
from Range import range
from TargetInfo import dtype_sizeof

struct Matrix[type: DType]:
    var data: DTypePointer[type]
    var rows: Int
    var cols: Int

    fn __init__(inout self, rows: Int, cols: Int):
        self.data = DTypePointer[type].alloc(rows * cols)
        rand(self.data, rows*cols)
        self.rows = rows
        self.cols = cols

    fn __copyinit__(inout self, other: Self):
        self.data = other.data
        self.rows = other.rows
        self.cols = other.cols

    fn __del__(owned self):
        self.data.free()

    fn zero(inout self):
        memset_zero(self.data, self.rows * self.cols)

    @always_inline
    fn __getitem__(self, y: Int, x: Int) -> SIMD[type, 1]:
        return self.load[1](y, x)

    @always_inline
    fn load[nelts:Int](self, y: Int, x: Int) -> SIMD[type, nelts]:
        return self.data.simd_load[nelts](y * self.cols + x)

    @always_inline
    fn __setitem__(self, y: Int, x: Int, val: SIMD[type, 1]):
        return self.store[1](y, x, val)

    @always_inline
    fn store[nelts:Int](self, y: Int, x: Int, val: SIMD[type, nelts]):
        self.data.simd_store[nelts](y * self.cols + x, val)

fn matrix_dataloader[type: DType]( a:PythonObject, o: Matrix[type], bs: Int) raises -> Matrix[type]:
    for i in range(bs):
        for j in range(o.cols):
            o[i,j] = a[i][j].to_float64().cast[type]()
    return o

let bs: Int = 5 # batch-size
let ni: Int = 28*28

let xb: Matrix[DType.float32] = Matrix[DType.float32](bs,ni)
let yb: Matrix[DType.float32] = Matrix[DType.float32](bs,1)
xb.zero()
yb.zero()

xb = matrix_dataloader(x_train, xb, bs)
yb = matrix_dataloader(y_train, yb, bs)

let no: Int = 10
var w: Matrix[DType.float32] = Matrix[DType.float32](ni, no) # weights
var b: Matrix[DType.float32] = Matrix[DType.float32](no, 1) # bias
b.zero()
var res = Matrix[DType.float32](xb.rows, w.cols) # result 
res.zero()

from TargetInfo import dtype_sizeof, dtype_simd_width
from Functional import vectorize

alias nelts = dtype_simd_width[DType.float32]() # The SIMD vector width.

fn lin_vectorized[type: DType](res: Matrix[type], xb: Matrix[type], w: Matrix[type], b: Matrix[type]) raises -> Matrix[type]:
    for i in range(xb.rows): # 50000
        for j in range(xb.cols): # 784
            @parameter
            fn dotbias[nelts: Int](k: Int):
                res.store[nelts](i,k, res.load[nelts](i,k) + xb[i,j] * w.load[nelts](j,k) + b.load[nelts](k,0))
            vectorize[nelts, dotbias](w.cols)
    return res

res.zero()
res = lin_vectorized(res, xb, w, b)

print(res.rows)
print(res.cols)

5
10

Foundations

RELu from foundations

from Math import max

fn relu_layer[type: DType](acts: Matrix[type], out: Matrix[type]) -> Matrix[type]:
    @parameter
    fn relu[nelts: Int](k: Int):
        #let l = SIMD[type, nelts](0)
        out.store[nelts](k,0, max[type, nelts](acts.load[nelts](k,0), 0.0))
    vectorize[nelts, relu](acts.rows)
    return out

Let’s test that with an example. Firstly a little print function for convenience.

fn print_matrix[type: DType](mat: Matrix[type]):
    for row in range(mat.rows):
        for col in range(mat.cols):
            print(mat[row, col])

And now, some dummy data.

# These are like the activations
var x = Matrix[DType.float32](4, 1) 
x[3,0] = -0.333 # Intentionally have a negative value here
print_matrix[DType.float32](x)

0.76618862152099609
0.21974173188209534
0.11567939817905426
-0.33300000429153442

var res = Matrix[DType.float32](x.rows, x.cols) # Matrix to hold the result 
res.zero()
print_matrix[DType.float32](res)

0.0
0.0
0.0
0.0

So, the result has been initialized / zeroed out nicely. Let’s see what it shows after calling the RELu function.

var o = relu_layer[DType.float32](x, res)
print_matrix(o)

0.76618862152099609
0.21974173188209534
0.11567939817905426
0.0

Looks good. The negative values are being zeroed-out, while the positive values are the same as the input matrix.
Next up: The loss function.

Loss function from the foundations: MSE

from Math import pow

fn loss[type: DType](y_pred: Matrix[type], y_true: Matrix[type], res: Matrix[type]):
    @parameter
    fn mse[nelts: Int](k: Int):
        let sum_of_squares = pow[type, nelts](y_pred.load[nelts](k,0) - y_true.load[nelts](k,0), 2).reduce_add()
        res.store[1](0, 0, res.load[1](0, 0) + sum_of_squares[0])
    vectorize[nelts, mse](y_pred.rows)
    res.store[1](0, 0, res.load[1](0, 0) / y_pred.rows )

Let us test that with a couple of examples.
First, if both inputs are zeros, then the loss should be zero.

var y1 = Matrix[DType.float32](8, 1) 
y1.zero()
var y2 = Matrix[DType.float32](8, 1) 
y2.zero()
var l = Matrix[DType.float32](1, 1) 
l.zero()
print(l[0,0])

0.0

loss[DType.float32](y1, y2, l)
print(l[0,0])

0.0

That looks fine.
Now if we have two random matrices, we would expect the loss to be non-zero.

var y1 = Matrix[DType.float32](8, 1) 
var y2 = Matrix[DType.float32](8, 1) 
var l = Matrix[DType.float32](1, 1) 
l.zero()
print(l[0,0])

0.0

loss[DType.float32](y1, y2, l)
print(l[0,0])

0.21140147745609283

And if both the vectors are equal, we should get zero loss again.

l.zero()
loss[DType.float32](y1, y1, l)
print(l[0,0])

0.0

Foundations

RELu from foundations

Loss function from the foundations: MSE

Gradients and backward pass