kaggle地址

数据是很普通的MNIST(28*28) 即一个 28*28 的256灰度图,要求你识别出数字

操作系统:世界上最垃圾的操作系统(我还没说是哪个呢)

简单CNN Acc:0.99

CODE
参考&部分代码

数据准备

首先我们来实现Baseline(Acc=0.99)

读入数据,我们使用 pandas 读入数据

1
2
train_origin = pd.read_csv(PATH+TRAIN)
test_origin = pd.read_csv(PATH+TEST)

由于我们的训练集是只有一个label,但是我们神经网络需要输出一个10维向量(代表 0 to 9),因此我们改写一下数据集,把 label 转化成 onehot 编码的

其实并不需要输出一个十维的,也可以就输出一个数字,但是一般不会这么写

1
one_hot_embedding =  pd.get_dummies(train_origin.label,prefix='y')

然后我们把这个编码出来的合并到原来训练集中,并且删除原来的label标签

1
2
3
4
5
# axis = 1 是横着合并
train_origin = pd.concat([one_hot_embedding,train_origin],axis=1)

# axis ? 索引 : 列
train_origin = train_origin.drop(['label'],axis=1)

现在就是划分训练集和测试集,我们使用 sklearn 中的 train_test_split函数。
stratify表示是否按照数据分布划test和val

1
2
3
4
5
6
7
8
x_train, x_val, y_train, y_val = train_test_split(train_origin.iloc[:, 10:], train_origin.iloc[:, 0:10], train_size = 0.92, stratify=train_origin.iloc[:, 0:10])
# train_origin.iloc[:, 0:10] 表示我们刚才的 y_0,...y_9 那几列,我们希望数据集和验证集都符合这样的分布
# train_size = 0.92 表示92%的数据划分进了训练集,别的都在验证集
# 这里并不一定需要除以255,但是为了习惯除一下
x_train = x_train/255.0
x_val = x_val/255.0
train_dataset = pd.concat([x_train, y_train], axis=1).reset_index(drop=True)
val_dataset = pd.concat([x_val, y_val], axis=1).reset_index(drop=True)

这个是 Dataframe 不多说

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
class MnistDataset(Dataset):
"""
This class is made for the mnist dataset we have currently. This is specifically made for a PyTorch DataLoader.

Args:
df (pd.DataFrame): A pandas dataframe consisting of either a training dataset or a validation dataset.
transform (A.Compose): An Albumentations pipeline object where we define all the transformation methods we are using.
"""

def __init__(self, df: pd.DataFrame, transform: A.Compose = None):
self.df = df
self.transform = transform

def __len__(self) -> int:
"""
This function returns the length of our dataframe object.

Returns:
The length of our dataframe.
"""
return self.df.shape[0]

def __getitem__(self, idx: int) -> Tuple[torch.tensor, torch.tensor]:
pixel_series, label = self.df.iloc[idx, :784], self.df.iloc[idx, 784:]

pixel_numpy = pixel_series.to_numpy().reshape(28, 28)

if self.transform:
transformed = self.transform(image=pixel_numpy)["image"]

return transformed, np.array(label)

接下来我们加载数据

numpy 和 torch 对于图像的表示不同,numpy是 HW C(Channel) ,而torch 是 CHW ,因此需要to tensor
我并不知道 totensorV2后还会不会共享内存,但是它真的快,虽然这模型没跑到GPU瓶颈。

1
2
3
4
5
6
7
8
9
# 在这里,我们设置了一个图像随机旋转,0.5的概率旋转不超过25度

train_pytorch_dataset = MnistDataset(df=train_dataset, transform=A.Compose([
A.Rotate(limit=17.5, p=0.8),
ToTensorV2(),
]))
validate_pytorch_dataset = MnistDataset(df=val_dataset, transform=A.Compose([
ToTensorV2(),
]))

加载数据

1
2
3
4
5
6
7
8
9
10
train_dataloader = DataLoader(train_pytorch_dataset, batch_size=64, shuffle=True,num_workers=NUM_WORKERS)
val_dataloader = DataLoader(validate_pytorch_dataset, batch_size=64, shuffle=True,num_workers=NUM_WORKERS)

# hyperparaments

loss_fn = nn.CrossEntropyLoss()
epochs = 20
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net = Simple_CNN(input_shape=(1, 28, 28), output_classes=10).to(device)
optimizer = torch.optim.Adam(net.parameters(), lr=0.0003)

CNN

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
class Simple_CNN(nn.Module):
def __init__(self, input_shape: tuple = (1, 28, 28), output_classes: int = 10):
super().__init__()

self.conv_layer_1 = nn.Sequential(
nn.Conv2d(in_channels=input_shape[0], out_channels=32, kernel_size=7,padding=2),
nn.BatchNorm2d(32),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
)

output_h, output_w = self._calculate_output_shape(input_shape = (input_shape[1], input_shape[2]), sequential_block=self.conv_layer_1)

self.conv_layer_2 = nn.Sequential(
nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
)

output_h, output_w = self._calculate_output_shape(input_shape = (output_h, output_w), sequential_block=self.conv_layer_2)

self.flatten = nn.Flatten()

self.fc = nn.Sequential(
nn.Linear(output_h * output_w * 64, 4096),
nn.BatchNorm1d(4096),
nn.ReLU(inplace=True),
nn.Dropout(0.6),
)

self.fc1 = nn.Sequential(
nn.Linear(4096, 1024),
nn.BatchNorm1d(1024),
nn.ReLU(inplace=True),
nn.Dropout(0.6),
)

self.classifier = nn.Linear(1024, output_classes)

def _calculate_output_shape(self, input_shape: Tuple[int, int], sequential_block: nn.Sequential) -> Tuple[int, int]:

output_shape = input_shape

for layer in sequential_block:

if isinstance(layer, nn.Conv2d):
output_shape = (
(output_shape[0] - layer.kernel_size[0] + 2 * layer.padding[0])
(output_shape[1] - layer.kernel_size[1] + 2 * layer.padding[1])
)

if isinstance(layer, nn.MaxPool2d):

output_shape = (
(output_shape[0] - layer.kernel_size)
(output_shape[1] - layer.kernel_size)
)

return output_shape

def forward(self, x):
x = self.conv_layer_1(x)
x = self.conv_layer_2(x)
x = self.flatten(x)
x = self.fc(x)
x = self.fc1(x)
x = self.classifier(x)
return x

train

1
2
3
4
5
6
7
train_acc, train_loss, val_acc, val_loss = fit(model=net,
loss_fn=loss_fn,
optimizer=optimizer,
epochs=epochs,
train_dataloader=train_dataloader,
val_dataloader=val_dataloader,
device=device)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
def fit(model, loss_fn, optimizer, epochs, train_dataloader, val_dataloader, device):

PAT = 'C:\\Users\\scmmm\\Desktop\\mmmtorch\\AI\\0DigitalRecognizer\\model.pth'
torch.save(model.state_dict(),PAT)
model.train()

loss_fn = loss_fn.cuda()

train_acc, train_loss, val_acc, val_loss = [], [], [], []

lastbest=0.0

for epoch in range(epochs):

train_loss_batch, train_acc_batch = 0, 0
val_loss_batch, val_acc_batch = 0, 0
tmp=0
looker=0
for inputs, labels in train_dataloader:

tmp=tmp+1

inputs = inputs.to(device)
labels = labels.to(device)

inputs = inputs.to(torch.float32)
labels = labels.to(torch.float32)

# Zero the parameter gradients
optimizer.zero_grad()

output = model(inputs)

loss = loss_fn(output, labels)

loss.backward()
optimizer.step()

train_loss_batch += loss.item()

# 返回最大值和索引
_, predicted = torch.max(output, 1)

_, y_true = torch.max(labels, 1)

correct = (predicted == y_true).sum().item()

train_acc_batch += correct / len(labels)

looker=train_acc_batch/tmp


print('\r [{:03d}/{:03d}] {:03d} Train Acc: {:3.6f} '.format(
epoch+1 , epochs ,tmp ,looker
),end='')
train_average_accuracy = train_acc_batch / len(train_dataloader)
train_average_loss = train_loss_batch / len(train_dataloader)

train_loss.append(train_average_loss)
train_acc.append(train_average_accuracy)

model.eval()

with torch.no_grad():
for inputs, labels in val_dataloader:
inputs = inputs.to(device)
labels = labels.to(device)

inputs = inputs.to(torch.float32)
labels = labels.to(torch.float32)

output = model(inputs)
loss = loss_fn(output, labels)

val_loss_batch += loss.item()

_, predicted = torch.max(output, 1)

_, y_true = torch.max(labels, 1)

correct = (predicted == y_true).sum().item()

val_acc_batch += correct / len(labels)

val_average_accuracy = val_acc_batch / len(val_dataloader)
val_average_loss = val_loss_batch / len(val_dataloader)

val_loss.append(val_average_loss)
val_acc.append(val_average_accuracy)

print('\n{:3.6f} :{:3.6f}\n ',val_average_accuracy,lastbest)
# 搞点小优化,其实就是保存最好的模型
if val_average_accuracy<lastbest :
model.load_state_dict(torch.load(PAT))
print('[{:03d}/{:03d}] |Acc: {:3.6f} (unchanged)'.format(
epoch+1 , epochs , lastbest
), end='\n')
else :
torch.save(model.state_dict(),PAT)
lastbest = val_average_accuracy
print('[{:03d}/{:03d}] | train Acc: {:3.6f} loss: {:3.6f} | test Acc: {:3.6f} loss: {:3.6f}'.format(
epoch+1 , epochs , train_average_accuracy, train_average_loss,val_average_accuracy,val_average_loss,
), end='\n')

return train_acc, train_loss, val_acc, val_loss

测试测试集

最后是对数据进行验证

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# 验证数据
# compose 里面还可以加上其他数据增强的东西,比如随即仿射变换等等。

PAT = PATH+MODEL
torch.save(net.state_dict(),PAT)
net.load_state_dict(torch.load(PAT))

test_origin/=255.0

test_pytorch_dataset = MnistDataset(df=test_origin, transform=A.Compose([
ToTensorV2(),
]))

test_dataloader = DataLoader(test_pytorch_dataset, batch_size=1, shuffle=False,num_workers=NUM_WORKERS)

def test(model, test_dataloader, device):

ids = []
preds = []

model.eval()

tmp = 0
target = len((test_dataloader))

with torch.no_grad():
for idx, (inputs, labels) in enumerate(test_dataloader):
tmp += 1
inputs = inputs.to(device)
labels = labels.to(device)

inputs = inputs.to(torch.float32)
labels = labels.to(torch.float32)

output = model(inputs)

_, predicted = torch.max(output, 1)

print('\r[{:03d}/{:03d}] '.format(
tmp , target
), end='')

ids.append(idx + 1)
preds.append(predicted.item())

return ids, preds

ids, pred = test(model=net, test_dataloader=test_dataloader, device=device)

preds_df = pd.DataFrame({"ImageId": ids, "Label": pred})

preds_df.to_csv(PATH+SUBMISSION, index=False)

getscore1 = pd.read_csv(PATH+'answer.csv')
getscore1 = getscore1.loc[ : ,"Label"]
# if check
# getscore1 = getscore1[:999]
#
getscore2 = pd.read_csv(PATH+SUBMISSION)
getscore2 = getscore2.loc[ : ,"Label"]

tot = (getscore1==getscore2).sum()
score = tot / len(getscore2)
print(score)