import struct
import numpy as np
from torch.utils.data import Dataset, DataLoader
def softmax(x):
ex = np.exp(x)
sum_ex = np.sum(ex, axis=-1, keepdims = True)
return ex/sum_ex
def load_labels(file): # 加载数据
with open(file, "rb") as f:
data = f.read()
return np.asanyarray(bytearray(data[8:]), dtype=np.int32)
def load_images(file): # 加载数据
with open(file, "rb") as f:
data = f.read()
magic_number, num_items, rows, cols = struct.unpack(">iiii", data[:16])
return np.asanyarray(bytearray(data[16:]), dtype=np.uint8).reshape(num_items, -1)
class Model():
def __init__(self):
self.w = np.ones(shape = (784, 10))
def forward(self, x):
predict = x @ self.w
p = softmax(predict)
return p
class MyDataset(Dataset):
def __init__(self, data, label):
self.data = data
self.label = label
def __len__(self):
return len(self.label)
def __getitem__(self, item):
image = self.data[item] / 255
label = self.label[item]
one_hot_label = [0 for i in range(10)]
one_hot_label[label] = 1
return image, np.array(one_hot_label)
if __name__ == "__main__":
train_images = load_images("mnist_data/MNIST/raw/train-images-idx3-ubyte")
train_label = load_labels("mnist_data/MNIST/raw/train-labels-idx1-ubyte")
test_images = load_images("mnist_data/MNIST/raw/t10k-images-idx3-ubyte")
test_label = load_labels("mnist_data/MNIST/raw/t10k-labels-idx1-ubyte")
batch_size = 20
epoch = 30
lr = 0.0005
train_datasets = MyDataset(train_images,train_label)
train_dataloader = DataLoader(train_datasets,batch_size,shuffle=True)
test_datasets = MyDataset(test_images, test_label)
test_dataloader = DataLoader(test_datasets, batch_size, shuffle=False)
model = Model()
for e in range(epoch):
for train_data, train_label in train_dataloader:
train_data = np.array(train_data)
train_label = np.array(train_label)
predict = model.forward(train_data)
loss = -np.sum(train_label * np.log(predict))/batch_size
G = predict - train_label
delt_w = train_data.T @ G
model.w -= lr*delt_w
right_num = 0
for text_data, text_label in test_dataloader: # 10 * 784
text_data = np.array(text_data)
text_label = np.array(text_label)
predict = model.forward(text_data)
predict = np.argmax(predict,axis=-1)
text_label = np.argmax(text_label,axis=-1)
right_num += np.sum(text_label==predict)
acc = right_num / len(test_datasets)
print("acc:", acc)
一定要确保整套任务中,各变量类型统一为numpy类型或tensor类型