πŸ”₯ Dataset struct

from PythonInterface import Python

let pathlib = Python.import_module("pathlib") # Python standard library
let gzip = Python.import_module("gzip") # Python standard library
let pickle = Python.import_module("pickle") # Python standard library
let np = Python.import_module("numpy")
path_gz = pathlib.Path('./lost+found/data/mnist.pkl.gz')
f = gzip.open(path_gz, 'rb')
u = pickle._Unpickler(f)
u.encoding = 'latin1'
data = u.load()

data_train = data[0]
data_valid = data[1]

x_train = data_train[0]
y_train = data_train[1]
y_train = np.expand_dims(y_train, 1)

x_valid = data_valid[0]
y_valid = data_valid[1]
y_valid = np.expand_dims(y_valid, 1)
f.close()
from DType import DType
from Memory import memset_zero
from Object import object, Attr
from Pointer import DTypePointer, Pointer
from Random import rand
from Range import range
from TargetInfo import dtype_sizeof

struct Matrix[type: DType]:
    var data: DTypePointer[type]
    var rows: Int
    var cols: Int

    fn __init__(inout self, rows: Int, cols: Int):
        self.data = DTypePointer[type].alloc(rows * cols)
        rand(self.data, rows*cols)
        self.rows = rows
        self.cols = cols

    fn __copyinit__(inout self, other: Self):
        self.data = other.data
        self.rows = other.rows
        self.cols = other.cols

    fn __del__(owned self):
        self.data.free()

    fn zero(inout self):
        memset_zero(self.data, self.rows * self.cols)

    @always_inline
    fn __getitem__(self, y: Int, x: Int) -> SIMD[type, 1]:
        return self.load[1](y, x)

    @always_inline
    fn load[nelts:Int](self, y: Int, x: Int) -> SIMD[type, nelts]:
        return self.data.simd_load[nelts](y * self.cols + x)

    @always_inline
    fn __setitem__(self, y: Int, x: Int, val: SIMD[type, 1]):
        return self.store[1](y, x, val)

    @always_inline
    fn store[nelts:Int](self, y: Int, x: Int, val: SIMD[type, nelts]):
        self.data.simd_store[nelts](y * self.cols + x, val)
@value
struct Dataset[type: DType, n_feats: Int]:
    var np_x: PythonObject
    var np_y: PythonObject
    var x: Matrix[type] 
    var y: Matrix[type]
    var len: Int

    fn __init__(inout self, np_x:PythonObject, np_y:PythonObject) raises:
        self.np_x = np_x
        self.np_y = np_y
        self.x = Matrix[type](1,n_feats)
        self.x.zero()
        self.y = Matrix[type](1,1)
        self.y.zero()
        self.len = np_x.shape[0].__index__()

    fn __len__(self) -> Int:
        return self.len

    fn __getitem__(self, i: Int) raises -> Tuple[Matrix[type], Matrix[type]]:
        self.y[0,0] = self.np_y[i][0].to_float64().cast[type]()
        for j in range(n_feats):
            self.x[i,j] = self.np_x[i][j].to_float64().cast[type]()
        return Tuple(self.x, self.y)
var ds = Dataset[DType.float32, 28*28](x_train, y_train)
print(ds.__len__())
var ds_item = ds[5]
print(ds_item.__len__())
50000
2

Let’s take a stab at creating a Dataloader.

@value
struct Dataloader[type: DType, n_feats: Int]:
    var len: Int
    var bs: Int
    var ds: Dataset[type, n_feats]
    var current: Int
    var xb: Matrix[type] 
    var yb: Matrix[type]

    fn __init__(inout self, ds: Dataset[type, n_feats], bs: Int):
        self.ds = ds
        self.bs = bs
        self.len = ds.__len__()//bs
        self.current = 0
        self.xb = Matrix[type](bs,n_feats)
        self.xb.zero()
        self.yb = Matrix[type](bs,1)
        self.yb.zero()

    fn __len__(self) -> Int:
        return self.len

    fn __iter__(self) -> Self:
        return self

    fn __next__(inout self) raises -> Matrix[type]:
        self.len = self.len - 1
        for i in range(self.current*self.bs, (self.current+1)*self.bs):
            for j in range(n_feats):
                self.xb[i,j] = self.ds[i].get[0, Matrix]()[0,j]
        self.current = self.current + 1
        return self.xb
error: Expression [7]:49:57: invalid call to 'get': result cannot bind generic !mlirtype to memory-only type 'Matrix'
                self.xb[i,j] = self.ds[i].get[0, Matrix]()[0,j]
                               ~~~~~~~~~~~~~~~~~~~~~~~~~^~

/.modular/Kernels/mojo/Builtin/Tuple.mojo:58:5: function declared here
    fn get[i: Int, T: AnyType](self) -> T:
    ^

expression failed to parse (no further compiler diagnostics)