forked from kira7005/MLP_Anomaly_Detection
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathout_log
2011 lines (2011 loc) · 102 KB
/
out_log
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
_create_mixer
Pretrained= False
default_Cfgs= {'url': '', 'num_classes': 2, 'input_size': (3, 224, 224), 'pool_size': None, 'crop_pct': 0.875, 'interpolation': 'bicubic', 'fixed_input_size': True, 'mean': (0.5, 0.5, 0.5), 'std': (0.5, 0.5, 0.5), 'first_conv': 'stem.proj', 'classifier': 'head'}
dataset_len= 288
True
<class 'torch.utils.data.dataloader.DataLoader'>
dataset_len= 32
False
<class 'torch.utils.data.dataloader.DataLoader'>
------Training-------
batch= 0
torch.Size([32, 16, 256])
torch.Size([32, 2])
torch.Size([32])
LabelSmoothingCrossEntropy()
confidence, nll_loss, self.smoothing, smooth_loss
0.9 tensor([0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931,
0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931,
0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931,
0.6931, 0.6931, 0.6931, 0.6931, 0.6931], device='cuda:0',
grad_fn=<SqueezeBackward1>) 0.1 tensor([0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931,
0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931,
0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931,
0.6931, 0.6931, 0.6931, 0.6931, 0.6931], device='cuda:0',
grad_fn=<NegBackward0>)
loss= tensor(0.6931, device='cuda:0', grad_fn=<MeanBackward0>)
batch= 1
torch.Size([32, 16, 256])
torch.Size([32, 2])
torch.Size([32])
LabelSmoothingCrossEntropy()
confidence, nll_loss, self.smoothing, smooth_loss
0.9 tensor([0.6927, 0.6926, 0.6926, 0.6926, 0.6926, 0.6926, 0.6926, 0.6926, 0.6926,
0.6926, 0.6926, 0.6926, 0.6926, 0.6926, 0.6926, 0.6926, 0.6926, 0.6926,
0.6926, 0.6926, 0.6926, 0.6925, 0.6926, 0.6926, 0.6926, 0.6926, 0.6926,
0.6926, 0.6926, 0.6925, 0.6926, 0.6937], device='cuda:0',
grad_fn=<SqueezeBackward1>) 0.1 tensor([0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931,
0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931,
0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931,
0.6931, 0.6931, 0.6931, 0.6931, 0.6931], device='cuda:0',
grad_fn=<NegBackward0>)
loss= tensor(0.6927, device='cuda:0', grad_fn=<MeanBackward0>)
batch= 2
torch.Size([32, 16, 256])
torch.Size([32, 2])
torch.Size([32])
LabelSmoothingCrossEntropy()
confidence, nll_loss, self.smoothing, smooth_loss
0.9 tensor([0.6916, 0.6916, 0.6918, 0.6917, 0.6918, 0.6916, 0.6916, 0.6918, 0.6918,
0.6917, 0.6917, 0.6917, 0.6917, 0.6917, 0.6916, 0.6917, 0.6917, 0.6916,
0.6947, 0.6917, 0.6917, 0.6917, 0.6917, 0.6917, 0.6917, 0.6917, 0.6918,
0.6916, 0.6916, 0.6916, 0.6917, 0.6918], device='cuda:0',
grad_fn=<SqueezeBackward1>) 0.1 tensor([0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931,
0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931,
0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931,
0.6931, 0.6931, 0.6931, 0.6931, 0.6931], device='cuda:0',
grad_fn=<NegBackward0>)
loss= tensor(0.6919, device='cuda:0', grad_fn=<MeanBackward0>)
batch= 3
torch.Size([32, 16, 256])
torch.Size([32, 2])
torch.Size([32])
LabelSmoothingCrossEntropy()
confidence, nll_loss, self.smoothing, smooth_loss
0.9 tensor([0.6903, 0.6907, 0.6903, 0.6904, 0.6907, 0.6906, 0.6907, 0.6903, 0.6904,
0.6905, 0.6903, 0.6959, 0.6904, 0.6906, 0.6905, 0.6906, 0.6905, 0.6905,
0.6906, 0.6904, 0.6906, 0.6905, 0.6906, 0.6960, 0.6903, 0.6907, 0.6905,
0.6906, 0.6905, 0.6907, 0.6907, 0.6907], device='cuda:0',
grad_fn=<SqueezeBackward1>) 0.1 tensor([0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932,
0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932,
0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932,
0.6932, 0.6932, 0.6932, 0.6932, 0.6932], device='cuda:0',
grad_fn=<NegBackward0>)
loss= tensor(0.6911, device='cuda:0', grad_fn=<MeanBackward0>)
batch= 4
torch.Size([32, 16, 256])
torch.Size([32, 2])
torch.Size([32])
LabelSmoothingCrossEntropy()
confidence, nll_loss, self.smoothing, smooth_loss
0.9 tensor([0.6894, 0.6894, 0.6889, 0.6894, 0.6889, 0.6975, 0.6893, 0.6889, 0.6891,
0.6892, 0.6896, 0.6973, 0.6976, 0.6890, 0.6893, 0.6889, 0.6888, 0.6889,
0.6894, 0.6888, 0.6974, 0.6974, 0.6890, 0.6896, 0.6972, 0.6887, 0.6892,
0.6889, 0.6890, 0.6888, 0.6893, 0.6892], device='cuda:0',
grad_fn=<SqueezeBackward1>) 0.1 tensor([0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932,
0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932,
0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932,
0.6932, 0.6932, 0.6932, 0.6932, 0.6932], device='cuda:0',
grad_fn=<NegBackward0>)
loss= tensor(0.6909, device='cuda:0', grad_fn=<MeanBackward0>)
batch= 5
torch.Size([32, 16, 256])
torch.Size([32, 2])
torch.Size([32])
LabelSmoothingCrossEntropy()
confidence, nll_loss, self.smoothing, smooth_loss
0.9 tensor([0.6876, 0.6878, 0.6880, 0.6876, 0.6877, 0.6878, 0.6880, 0.6989, 0.6879,
0.6873, 0.6884, 0.6877, 0.6876, 0.6876, 0.6879, 0.6877, 0.6876, 0.6882,
0.6875, 0.6875, 0.6877, 0.6881, 0.6877, 0.6991, 0.6880, 0.6875, 0.6881,
0.6987, 0.6874, 0.6877, 0.6880, 0.6884], device='cuda:0',
grad_fn=<SqueezeBackward1>) 0.1 tensor([0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932,
0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932,
0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932,
0.6932, 0.6932, 0.6932, 0.6932, 0.6932], device='cuda:0',
grad_fn=<NegBackward0>)
loss= tensor(0.6893, device='cuda:0', grad_fn=<MeanBackward0>)
batch= 6
torch.Size([32, 16, 256])
torch.Size([32, 2])
torch.Size([32])
LabelSmoothingCrossEntropy()
confidence, nll_loss, self.smoothing, smooth_loss
0.9 tensor([0.6862, 0.6858, 0.6861, 0.7007, 0.6857, 0.6864, 0.6866, 0.6860, 0.6861,
0.6862, 0.6862, 0.6863, 0.6864, 0.6863, 0.6857, 0.6863, 0.6859, 0.6862,
0.6857, 0.7008, 0.6860, 0.6869, 0.6860, 0.7005, 0.6861, 0.6857, 0.6866,
0.7008, 0.6857, 0.6856, 0.6855, 0.6864], device='cuda:0',
grad_fn=<SqueezeBackward1>) 0.1 tensor([0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932,
0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932,
0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932,
0.6932, 0.6932, 0.6932, 0.6932, 0.6932], device='cuda:0',
grad_fn=<NegBackward0>)
loss= tensor(0.6884, device='cuda:0', grad_fn=<MeanBackward0>)
batch= 7
torch.Size([32, 16, 256])
torch.Size([32, 2])
torch.Size([32])
LabelSmoothingCrossEntropy()
confidence, nll_loss, self.smoothing, smooth_loss
0.9 tensor([0.6848, 0.7023, 0.7028, 0.6848, 0.6849, 0.6848, 0.7029, 0.6839, 0.6837,
0.6838, 0.6837, 0.6848, 0.6839, 0.6842, 0.6838, 0.6844, 0.6843, 0.6847,
0.6836, 0.6840, 0.7021, 0.6838, 0.6836, 0.6836, 0.7027, 0.6849, 0.7023,
0.7023, 0.6842, 0.6848, 0.6843, 0.6844], device='cuda:0',
grad_fn=<SqueezeBackward1>) 0.1 tensor([0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932,
0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932,
0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932,
0.6932, 0.6932, 0.6932, 0.6932, 0.6932], device='cuda:0',
grad_fn=<NegBackward0>)
loss= tensor(0.6887, device='cuda:0', grad_fn=<MeanBackward0>)
batch= 8
torch.Size([32, 16, 256])
torch.Size([32, 2])
torch.Size([32])
LabelSmoothingCrossEntropy()
confidence, nll_loss, self.smoothing, smooth_loss
0.9 tensor([0.6833, 0.6820, 0.6809, 0.7045, 0.6832, 0.6825, 0.6827, 0.6829, 0.7038,
0.7045, 0.6832, 0.6833, 0.6839, 0.6829, 0.6834, 0.6840, 0.6829, 0.7037,
0.6833, 0.6829, 0.6827, 0.6828, 0.6829, 0.6821, 0.6829, 0.6834, 0.6828,
0.6823, 0.6835, 0.6822, 0.6825, 0.6829], device='cuda:0',
grad_fn=<SqueezeBackward1>) 0.1 tensor([0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932,
0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932,
0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932,
0.6932, 0.6932, 0.6932, 0.6932, 0.6932], device='cuda:0',
grad_fn=<NegBackward0>)
loss= tensor(0.6863, device='cuda:0', grad_fn=<MeanBackward0>)
------Validating-------
output = tensor([[0.5123, 0.4877],
[0.5117, 0.4883],
[0.5118, 0.4882],
[0.5118, 0.4882],
[0.5124, 0.4876],
[0.5122, 0.4878],
[0.5119, 0.4881],
[0.5127, 0.4873],
[0.5123, 0.4877],
[0.5121, 0.4879],
[0.5120, 0.4880],
[0.5121, 0.4879],
[0.5122, 0.4878],
[0.5125, 0.4875],
[0.5128, 0.4872],
[0.5126, 0.4874],
[0.5124, 0.4876],
[0.5124, 0.4876],
[0.5124, 0.4876],
[0.5124, 0.4876],
[0.5123, 0.4877],
[0.5122, 0.4878],
[0.5120, 0.4880],
[0.5124, 0.4876],
[0.5120, 0.4880],
[0.5119, 0.4881],
[0.5117, 0.4883],
[0.5119, 0.4881],
[0.5118, 0.4882],
[0.5119, 0.4881],
[0.5117, 0.4883],
[0.5124, 0.4876]], device='cuda:0')
target= tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
eval_loss= tensor(0.7054, device='cuda:0')
eval_acc1= tensor(0., device='cuda:0')
------Training-------
batch= 0
torch.Size([32, 16, 256])
torch.Size([32, 2])
torch.Size([32])
LabelSmoothingCrossEntropy()
confidence, nll_loss, self.smoothing, smooth_loss
0.9 tensor([0.6801, 0.6811, 0.6807, 0.6810, 0.6798, 0.6802, 0.6811, 0.6811, 0.6799,
0.6807, 0.7065, 0.6818, 0.6802, 0.6824, 0.6808, 0.6806, 0.6810, 0.6797,
0.6803, 0.6806, 0.7064, 0.6804, 0.6800, 0.7054, 0.6813, 0.6809, 0.6811,
0.6816, 0.6820, 0.6803, 0.6800, 0.6813], device='cuda:0',
grad_fn=<SqueezeBackward1>) 0.1 tensor([0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932,
0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932,
0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932,
0.6932, 0.6932, 0.6932, 0.6932, 0.6932], device='cuda:0',
grad_fn=<NegBackward0>)
loss= tensor(0.6841, device='cuda:0', grad_fn=<MeanBackward0>)
batch= 1
torch.Size([32, 16, 256])
torch.Size([32, 2])
torch.Size([32])
LabelSmoothingCrossEntropy()
confidence, nll_loss, self.smoothing, smooth_loss
0.9 tensor([0.4440, 0.4431, 0.4423, 1.0512, 0.4354, 0.4477, 0.4348, 0.4372, 0.4326,
0.4353, 0.4288, 1.0622, 0.4329, 0.4263, 0.4323, 0.4431, 0.4356, 0.4367,
0.4443, 0.4378, 0.4332, 0.4242, 0.4547, 0.4266, 0.4205, 0.4291, 0.4446,
0.4455, 0.4256, 0.4609, 0.4424, 0.4271], device='cuda:0',
grad_fn=<SqueezeBackward1>) 0.1 tensor([0.7349, 0.7352, 0.7356, 0.7406, 0.7383, 0.7334, 0.7386, 0.7376, 0.7395,
0.7384, 0.7412, 0.7432, 0.7394, 0.7422, 0.7396, 0.7352, 0.7383, 0.7378,
0.7348, 0.7373, 0.7393, 0.7432, 0.7308, 0.7421, 0.7449, 0.7410, 0.7346,
0.7343, 0.7426, 0.7285, 0.7355, 0.7419], device='cuda:0',
grad_fn=<NegBackward0>)
loss= tensor(0.5018, device='cuda:0', grad_fn=<MeanBackward0>)
batch= 2
torch.Size([32, 16, 256])
torch.Size([32, 2])
torch.Size([32])
LabelSmoothingCrossEntropy()
confidence, nll_loss, self.smoothing, smooth_loss
0.9 tensor([1.2373, 0.3479, 1.2251, 0.3466, 0.3493, 0.3496, 0.3549, 0.3486, 0.3413,
0.3564, 0.3540, 0.3417, 0.3526, 0.3504, 0.3545, 0.3465, 0.3580, 0.3578,
0.3433, 0.3538, 0.3425, 0.3502, 1.2251, 1.2219, 0.3512, 0.3503, 0.3460,
0.3590, 0.3446, 0.3562, 0.3484, 1.2262], device='cuda:0',
grad_fn=<SqueezeBackward1>) 0.1 tensor([0.7900, 0.7863, 0.7864, 0.7872, 0.7854, 0.7852, 0.7815, 0.7858, 0.7910,
0.7805, 0.7821, 0.7907, 0.7831, 0.7846, 0.7818, 0.7873, 0.7795, 0.7796,
0.7896, 0.7823, 0.7901, 0.7847, 0.7864, 0.7855, 0.7841, 0.7847, 0.7877,
0.7788, 0.7887, 0.7806, 0.7860, 0.7868], device='cuda:0',
grad_fn=<NegBackward0>)
loss= tensor(0.5170, device='cuda:0', grad_fn=<MeanBackward0>)
batch= 3
torch.Size([32, 16, 256])
torch.Size([32, 2])
torch.Size([32])
LabelSmoothingCrossEntropy()
confidence, nll_loss, self.smoothing, smooth_loss
0.9 tensor([0.3278, 0.3256, 0.3231, 0.3233, 0.3251, 0.3253, 0.3262, 0.3278, 0.3243,
0.3225, 0.3259, 0.3254, 0.3295, 0.3260, 0.3256, 0.3262, 0.3250, 0.3260,
0.3236, 0.3283, 0.3268, 0.3250, 0.3323, 0.3233, 0.3267, 0.3259, 0.3223,
0.3228, 0.3229, 0.3225, 0.3246, 0.3310], device='cuda:0',
grad_fn=<SqueezeBackward1>) 0.1 tensor([0.8013, 0.8030, 0.8051, 0.8049, 0.8034, 0.8033, 0.8026, 0.8013, 0.8041,
0.8055, 0.8028, 0.8032, 0.8000, 0.8027, 0.8031, 0.8026, 0.8035, 0.8027,
0.8046, 0.8009, 0.8021, 0.8035, 0.7978, 0.8049, 0.8022, 0.8028, 0.8057,
0.8053, 0.8052, 0.8056, 0.8038, 0.7988], device='cuda:0',
grad_fn=<NegBackward0>)
loss= tensor(0.3733, device='cuda:0', grad_fn=<MeanBackward0>)
batch= 4
torch.Size([32, 16, 256])
torch.Size([32, 2])
torch.Size([32])
LabelSmoothingCrossEntropy()
confidence, nll_loss, self.smoothing, smooth_loss
0.9 tensor([0.3171, 0.3167, 0.3177, 0.3159, 0.3175, 1.3035, 0.3192, 0.3186, 0.3171,
0.3171, 0.3168, 0.3180, 0.3172, 1.3040, 0.3188, 0.3205, 0.3203, 0.3178,
1.3052, 0.3161, 0.3166, 0.3167, 1.3039, 1.3045, 0.3166, 0.3163, 1.3048,
1.3016, 1.3018, 0.3161, 0.3171, 0.3150], device='cuda:0',
grad_fn=<SqueezeBackward1>) 0.1 tensor([0.8100, 0.8104, 0.8095, 0.8110, 0.8096, 0.8102, 0.8083, 0.8087, 0.8100,
0.8100, 0.8103, 0.8092, 0.8099, 0.8103, 0.8086, 0.8071, 0.8073, 0.8094,
0.8107, 0.8108, 0.8104, 0.8104, 0.8103, 0.8105, 0.8104, 0.8107, 0.8106,
0.8096, 0.8097, 0.8108, 0.8100, 0.8117], device='cuda:0',
grad_fn=<NegBackward0>)
loss= tensor(0.5885, device='cuda:0', grad_fn=<MeanBackward0>)
batch= 5
torch.Size([32, 16, 256])
torch.Size([32, 2])
torch.Size([32])
LabelSmoothingCrossEntropy()
confidence, nll_loss, self.smoothing, smooth_loss
0.9 tensor([0.3142, 0.3149, 0.3155, 0.3143, 1.3105, 0.3147, 1.3086, 0.3151, 1.3093,
1.3094, 0.3152, 0.3146, 0.3145, 0.3144, 0.3151, 0.3150, 1.3094, 0.3147,
0.3158, 0.3148, 0.3149, 0.3150, 0.3157, 0.3144, 0.3144, 0.3155, 0.3151,
0.3148, 0.3144, 0.3150, 0.3159, 0.3147], device='cuda:0',
grad_fn=<SqueezeBackward1>) 0.1 tensor([0.8125, 0.8118, 0.8114, 0.8123, 0.8124, 0.8120, 0.8118, 0.8117, 0.8120,
0.8120, 0.8116, 0.8122, 0.8122, 0.8122, 0.8117, 0.8118, 0.8121, 0.8120,
0.8111, 0.8120, 0.8119, 0.8118, 0.8112, 0.8123, 0.8123, 0.8113, 0.8117,
0.8119, 0.8123, 0.8118, 0.8110, 0.8120], device='cuda:0',
grad_fn=<NegBackward0>)
loss= tensor(0.5045, device='cuda:0', grad_fn=<MeanBackward0>)
batch= 6
torch.Size([32, 16, 256])
torch.Size([32, 2])
torch.Size([32])
LabelSmoothingCrossEntropy()
confidence, nll_loss, self.smoothing, smooth_loss
0.9 tensor([0.3139, 0.3137, 0.3137, 0.3138, 0.3138, 1.3116, 0.3138, 0.3139, 0.3140,
0.3139, 1.3113, 0.3137, 0.3137, 0.3140, 0.3139, 0.3138, 0.3143, 0.3143,
0.3139, 0.3140, 0.3138, 0.3138, 0.3138, 0.3140, 0.3139, 1.3122, 0.3137,
0.3138, 0.3141, 0.3138, 1.3120, 0.3139], device='cuda:0',
grad_fn=<SqueezeBackward1>) 0.1 tensor([0.8127, 0.8129, 0.8129, 0.8128, 0.8128, 0.8127, 0.8128, 0.8127, 0.8126,
0.8127, 0.8126, 0.8129, 0.8129, 0.8126, 0.8127, 0.8128, 0.8124, 0.8124,
0.8127, 0.8127, 0.8128, 0.8128, 0.8128, 0.8126, 0.8127, 0.8129, 0.8129,
0.8128, 0.8126, 0.8128, 0.8129, 0.8127], device='cuda:0',
grad_fn=<NegBackward0>)
loss= tensor(0.4760, device='cuda:0', grad_fn=<MeanBackward0>)
batch= 7
torch.Size([32, 16, 256])
torch.Size([32, 2])
torch.Size([32])
LabelSmoothingCrossEntropy()
confidence, nll_loss, self.smoothing, smooth_loss
0.9 tensor([0.3135, 0.3134, 0.3135, 0.3136, 0.3136, 0.3135, 0.3139, 0.3135, 0.3134,
0.3134, 0.3134, 0.3135, 0.3136, 0.3135, 0.3134, 0.3135, 1.3126, 0.3137,
0.3135, 0.3138, 0.3134, 0.3135, 0.3138, 0.3134, 0.3135, 0.3136, 0.3134,
0.3135, 1.3127, 0.3135, 0.3134, 0.3135], device='cuda:0',
grad_fn=<SqueezeBackward1>) 0.1 tensor([0.8130, 0.8131, 0.8131, 0.8130, 0.8130, 0.8131, 0.8127, 0.8131, 0.8131,
0.8131, 0.8131, 0.8131, 0.8130, 0.8131, 0.8131, 0.8131, 0.8131, 0.8129,
0.8131, 0.8128, 0.8131, 0.8130, 0.8128, 0.8131, 0.8131, 0.8130, 0.8131,
0.8131, 0.8131, 0.8130, 0.8131, 0.8131], device='cuda:0',
grad_fn=<NegBackward0>)
loss= tensor(0.4197, device='cuda:0', grad_fn=<MeanBackward0>)
batch= 8
torch.Size([32, 16, 256])
torch.Size([32, 2])
torch.Size([32])
LabelSmoothingCrossEntropy()
confidence, nll_loss, self.smoothing, smooth_loss
0.9 tensor([0.3135, 0.3134, 0.3134, 0.3133, 0.3134, 1.3129, 1.3130, 0.3133, 0.3135,
0.3133, 0.3134, 0.3134, 0.3134, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
1.3130, 0.3134, 0.3133, 0.3134, 0.3134, 0.3134, 0.3134, 0.3133, 0.3133,
0.3134, 0.3134, 0.3134, 0.3134, 0.3133], device='cuda:0',
grad_fn=<SqueezeBackward1>) 0.1 tensor([0.8131, 0.8132, 0.8132, 0.8132, 0.8131, 0.8131, 0.8132, 0.8132, 0.8130,
0.8132, 0.8131, 0.8132, 0.8132, 0.8132, 0.8132, 0.8132, 0.8132, 0.8132,
0.8132, 0.8132, 0.8132, 0.8132, 0.8132, 0.8131, 0.8131, 0.8132, 0.8132,
0.8131, 0.8131, 0.8131, 0.8132, 0.8132], device='cuda:0',
grad_fn=<NegBackward0>)
loss= tensor(0.4477, device='cuda:0', grad_fn=<MeanBackward0>)
------Validating-------
output = tensor([[9.9987e-01, 1.3297e-04],
[9.9981e-01, 1.9078e-04],
[9.9982e-01, 1.7943e-04],
[9.9982e-01, 1.7781e-04],
[9.9988e-01, 1.2462e-04],
[9.9986e-01, 1.3894e-04],
[9.9983e-01, 1.7157e-04],
[9.9989e-01, 1.0836e-04],
[9.9987e-01, 1.3451e-04],
[9.9985e-01, 1.4906e-04],
[9.9984e-01, 1.6087e-04],
[9.9984e-01, 1.5916e-04],
[9.9985e-01, 1.4523e-04],
[9.9988e-01, 1.1763e-04],
[9.9990e-01, 9.5136e-05],
[9.9989e-01, 1.0943e-04],
[9.9987e-01, 1.3151e-04],
[9.9987e-01, 1.2863e-04],
[9.9988e-01, 1.2315e-04],
[9.9988e-01, 1.2338e-04],
[9.9986e-01, 1.3644e-04],
[9.9986e-01, 1.4150e-04],
[9.9984e-01, 1.6069e-04],
[9.9988e-01, 1.2354e-04],
[9.9984e-01, 1.5766e-04],
[9.9983e-01, 1.7292e-04],
[9.9981e-01, 1.9048e-04],
[9.9982e-01, 1.7739e-04],
[9.9981e-01, 1.8760e-04],
[9.9983e-01, 1.7279e-04],
[9.9981e-01, 1.9314e-04],
[9.9987e-01, 1.2820e-04]], device='cuda:0')
target= tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
eval_loss= tensor(1.3130, device='cuda:0')
eval_acc1= tensor(0., device='cuda:0')
------Training-------
batch= 0
torch.Size([32, 16, 256])
torch.Size([32, 2])
torch.Size([32])
LabelSmoothingCrossEntropy()
confidence, nll_loss, self.smoothing, smooth_loss
0.9 tensor([1.3131, 1.3131, 0.3133, 0.3133, 0.3133, 1.3130, 0.3133, 0.3133, 0.3134,
0.3133, 0.3133, 0.3133, 0.3133, 0.3134, 0.3133, 0.3133, 1.3131, 0.3133,
0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
0.3134, 0.3133, 0.3133, 0.3133, 1.3131], device='cuda:0',
grad_fn=<SqueezeBackward1>) 0.1 tensor([0.8132, 0.8132, 0.8132, 0.8132, 0.8132, 0.8132, 0.8132, 0.8132, 0.8131,
0.8132, 0.8132, 0.8132, 0.8132, 0.8132, 0.8132, 0.8132, 0.8132, 0.8132,
0.8132, 0.8132, 0.8132, 0.8132, 0.8132, 0.8132, 0.8132, 0.8132, 0.8132,
0.8132, 0.8132, 0.8132, 0.8132, 0.8132], device='cuda:0',
grad_fn=<NegBackward0>)
loss= tensor(0.5039, device='cuda:0', grad_fn=<MeanBackward0>)
batch= 1
torch.Size([32, 16, 256])
torch.Size([32, 2])
torch.Size([32])
LabelSmoothingCrossEntropy()
confidence, nll_loss, self.smoothing, smooth_loss
0.9 tensor([0.3133, 0.3133, 0.3133, 1.3132, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3132, 0.3133, 0.3133,
0.3133, 1.3132, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0',
grad_fn=<SqueezeBackward1>) 0.1 tensor([0.8132, 0.8132, 0.8132, 0.8132, 0.8132, 0.8132, 0.8132, 0.8133, 0.8132,
0.8133, 0.8133, 0.8133, 0.8133, 0.8132, 0.8132, 0.8132, 0.8133, 0.8133,
0.8132, 0.8133, 0.8133, 0.8132, 0.8133, 0.8132, 0.8132, 0.8132, 0.8133,
0.8132, 0.8133, 0.8133, 0.8132, 0.8132], device='cuda:0',
grad_fn=<NegBackward0>)
loss= tensor(0.4476, device='cuda:0', grad_fn=<MeanBackward0>)
batch= 2
torch.Size([32, 16, 256])
torch.Size([32, 2])
torch.Size([32])
LabelSmoothingCrossEntropy()
confidence, nll_loss, self.smoothing, smooth_loss
0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133,
0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3132,
0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0',
grad_fn=<SqueezeBackward1>) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8132, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0',
grad_fn=<NegBackward0>)
loss= tensor(0.4476, device='cuda:0', grad_fn=<MeanBackward0>)
batch= 3
torch.Size([32, 16, 256])
torch.Size([32, 2])
torch.Size([32])
LabelSmoothingCrossEntropy()
confidence, nll_loss, self.smoothing, smooth_loss
0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
1.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133,
0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0',
grad_fn=<SqueezeBackward1>) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0',
grad_fn=<NegBackward0>)
loss= tensor(0.4476, device='cuda:0', grad_fn=<MeanBackward0>)
batch= 4
torch.Size([32, 16, 256])
torch.Size([32, 2])
torch.Size([32])
LabelSmoothingCrossEntropy()
confidence, nll_loss, self.smoothing, smooth_loss
0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
1.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133,
0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
1.3133, 0.3133, 0.3133, 0.3133, 1.3133], device='cuda:0',
grad_fn=<SqueezeBackward1>) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0',
grad_fn=<NegBackward0>)
loss= tensor(0.5039, device='cuda:0', grad_fn=<MeanBackward0>)
batch= 5
torch.Size([32, 16, 256])
torch.Size([32, 2])
torch.Size([32])
LabelSmoothingCrossEntropy()
confidence, nll_loss, self.smoothing, smooth_loss
0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 1.3133,
0.3133, 0.3133, 1.3133, 0.3133, 1.3133, 0.3133, 0.3133, 1.3133, 0.3133,
0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133,
0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0',
grad_fn=<SqueezeBackward1>) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0',
grad_fn=<NegBackward0>)
loss= tensor(0.5601, device='cuda:0', grad_fn=<MeanBackward0>)
batch= 6
torch.Size([32, 16, 256])
torch.Size([32, 2])
torch.Size([32])
LabelSmoothingCrossEntropy()
confidence, nll_loss, self.smoothing, smooth_loss
0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0',
grad_fn=<SqueezeBackward1>) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0',
grad_fn=<NegBackward0>)
loss= tensor(0.3633, device='cuda:0', grad_fn=<MeanBackward0>)
batch= 7
torch.Size([32, 16, 256])
torch.Size([32, 2])
torch.Size([32])
LabelSmoothingCrossEntropy()
confidence, nll_loss, self.smoothing, smooth_loss
0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133,
0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 1.3133,
0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0',
grad_fn=<SqueezeBackward1>) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0',
grad_fn=<NegBackward0>)
loss= tensor(0.4758, device='cuda:0', grad_fn=<MeanBackward0>)
batch= 8
torch.Size([32, 16, 256])
torch.Size([32, 2])
torch.Size([32])
LabelSmoothingCrossEntropy()
confidence, nll_loss, self.smoothing, smooth_loss
0.9 tensor([0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133,
0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0',
grad_fn=<SqueezeBackward1>) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0',
grad_fn=<NegBackward0>)
loss= tensor(0.4195, device='cuda:0', grad_fn=<MeanBackward0>)
------Validating-------
output = tensor([[1.0000e+00, 3.4283e-08],
[1.0000e+00, 6.0943e-08],
[1.0000e+00, 5.2414e-08],
[1.0000e+00, 5.2234e-08],
[1.0000e+00, 2.9134e-08],
[1.0000e+00, 3.5439e-08],
[1.0000e+00, 5.2968e-08],
[1.0000e+00, 2.4628e-08],
[1.0000e+00, 3.3891e-08],
[1.0000e+00, 3.9833e-08],
[1.0000e+00, 4.6143e-08],
[1.0000e+00, 4.5376e-08],
[1.0000e+00, 3.9571e-08],
[1.0000e+00, 2.7552e-08],
[1.0000e+00, 1.9134e-08],
[1.0000e+00, 2.4356e-08],
[1.0000e+00, 3.3188e-08],
[1.0000e+00, 3.1166e-08],
[1.0000e+00, 2.8297e-08],
[1.0000e+00, 2.8663e-08],
[1.0000e+00, 3.4569e-08],
[1.0000e+00, 3.7047e-08],
[1.0000e+00, 4.4655e-08],
[1.0000e+00, 2.7564e-08],
[1.0000e+00, 4.3321e-08],
[1.0000e+00, 5.1786e-08],
[1.0000e+00, 6.0503e-08],
[1.0000e+00, 5.4386e-08],
[1.0000e+00, 5.9361e-08],
[1.0000e+00, 5.2789e-08],
[1.0000e+00, 6.3719e-08],
[1.0000e+00, 3.1184e-08]], device='cuda:0')
target= tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
eval_loss= tensor(1.3133, device='cuda:0')
eval_acc1= tensor(0., device='cuda:0')
------Training-------
batch= 0
torch.Size([32, 16, 256])
torch.Size([32, 2])
torch.Size([32])
LabelSmoothingCrossEntropy()
confidence, nll_loss, self.smoothing, smooth_loss
0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133,
0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 1.3133, 1.3133, 1.3133,
0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0',
grad_fn=<SqueezeBackward1>) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0',
grad_fn=<NegBackward0>)
loss= tensor(0.5039, device='cuda:0', grad_fn=<MeanBackward0>)
batch= 1
torch.Size([32, 16, 256])
torch.Size([32, 2])
torch.Size([32])
LabelSmoothingCrossEntropy()
confidence, nll_loss, self.smoothing, smooth_loss
0.9 tensor([0.3133, 1.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
0.3133, 0.3133, 1.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133,
0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0',
grad_fn=<SqueezeBackward1>) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0',
grad_fn=<NegBackward0>)
loss= tensor(0.5039, device='cuda:0', grad_fn=<MeanBackward0>)
batch= 2
torch.Size([32, 16, 256])
torch.Size([32, 2])
torch.Size([32])
LabelSmoothingCrossEntropy()
confidence, nll_loss, self.smoothing, smooth_loss
0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
0.3133, 1.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0',
grad_fn=<SqueezeBackward1>) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0',
grad_fn=<NegBackward0>)
loss= tensor(0.4476, device='cuda:0', grad_fn=<MeanBackward0>)
batch= 3
torch.Size([32, 16, 256])
torch.Size([32, 2])
torch.Size([32])
LabelSmoothingCrossEntropy()
confidence, nll_loss, self.smoothing, smooth_loss
0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133,
0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133,
0.3133, 0.3133, 0.3133, 0.3133, 1.3133], device='cuda:0',
grad_fn=<SqueezeBackward1>) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0',
grad_fn=<NegBackward0>)
loss= tensor(0.4758, device='cuda:0', grad_fn=<MeanBackward0>)
batch= 4
torch.Size([32, 16, 256])
torch.Size([32, 2])
torch.Size([32])
LabelSmoothingCrossEntropy()
confidence, nll_loss, self.smoothing, smooth_loss
0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133,
1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
0.3133, 0.3133, 0.3133, 0.3133, 1.3133], device='cuda:0',
grad_fn=<SqueezeBackward1>) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0',
grad_fn=<NegBackward0>)
loss= tensor(0.4476, device='cuda:0', grad_fn=<MeanBackward0>)
batch= 5
torch.Size([32, 16, 256])
torch.Size([32, 2])
torch.Size([32])
LabelSmoothingCrossEntropy()
confidence, nll_loss, self.smoothing, smooth_loss
0.9 tensor([0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133,
0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0',
grad_fn=<SqueezeBackward1>) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0',
grad_fn=<NegBackward0>)
loss= tensor(0.4476, device='cuda:0', grad_fn=<MeanBackward0>)
batch= 6
torch.Size([32, 16, 256])
torch.Size([32, 2])
torch.Size([32])
LabelSmoothingCrossEntropy()
confidence, nll_loss, self.smoothing, smooth_loss
0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133,
0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133,
0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133,
0.3133, 0.3133, 0.3133, 1.3133, 0.3133], device='cuda:0',
grad_fn=<SqueezeBackward1>) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0',
grad_fn=<NegBackward0>)
loss= tensor(0.4758, device='cuda:0', grad_fn=<MeanBackward0>)
batch= 7
torch.Size([32, 16, 256])
torch.Size([32, 2])
torch.Size([32])
LabelSmoothingCrossEntropy()
confidence, nll_loss, self.smoothing, smooth_loss
0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133,
1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
0.3133, 0.3133, 0.3133, 1.3133, 0.3133], device='cuda:0',
grad_fn=<SqueezeBackward1>) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0',
grad_fn=<NegBackward0>)
loss= tensor(0.4476, device='cuda:0', grad_fn=<MeanBackward0>)
batch= 8
torch.Size([32, 16, 256])
torch.Size([32, 2])
torch.Size([32])
LabelSmoothingCrossEntropy()
confidence, nll_loss, self.smoothing, smooth_loss
0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0',
grad_fn=<SqueezeBackward1>) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0',
grad_fn=<NegBackward0>)
loss= tensor(0.4195, device='cuda:0', grad_fn=<MeanBackward0>)
------Validating-------
output = tensor([[1.0000e+00, 3.4280e-08],
[1.0000e+00, 6.0938e-08],
[1.0000e+00, 5.2409e-08],
[1.0000e+00, 5.2230e-08],
[1.0000e+00, 2.9132e-08],
[1.0000e+00, 3.5436e-08],
[1.0000e+00, 5.2964e-08],
[1.0000e+00, 2.4626e-08],
[1.0000e+00, 3.3888e-08],
[1.0000e+00, 3.9830e-08],
[1.0000e+00, 4.6140e-08],
[1.0000e+00, 4.5372e-08],
[1.0000e+00, 3.9568e-08],
[1.0000e+00, 2.7549e-08],
[1.0000e+00, 1.9132e-08],
[1.0000e+00, 2.4354e-08],
[1.0000e+00, 3.3185e-08],
[1.0000e+00, 3.1164e-08],
[1.0000e+00, 2.8295e-08],
[1.0000e+00, 2.8660e-08],
[1.0000e+00, 3.4566e-08],
[1.0000e+00, 3.7044e-08],
[1.0000e+00, 4.4652e-08],
[1.0000e+00, 2.7561e-08],
[1.0000e+00, 4.3318e-08],
[1.0000e+00, 5.1782e-08],
[1.0000e+00, 6.0497e-08],
[1.0000e+00, 5.4381e-08],
[1.0000e+00, 5.9356e-08],
[1.0000e+00, 5.2785e-08],
[1.0000e+00, 6.3714e-08],
[1.0000e+00, 3.1181e-08]], device='cuda:0')
target= tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
eval_loss= tensor(1.3133, device='cuda:0')
eval_acc1= tensor(0., device='cuda:0')
------Training-------
batch= 0
torch.Size([32, 16, 256])
torch.Size([32, 2])
torch.Size([32])
LabelSmoothingCrossEntropy()
confidence, nll_loss, self.smoothing, smooth_loss
0.9 tensor([0.3133, 1.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 1.3133,
0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
0.3133, 0.3133, 0.3133, 0.3133, 1.3133], device='cuda:0',
grad_fn=<SqueezeBackward1>) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0',
grad_fn=<NegBackward0>)
loss= tensor(0.4758, device='cuda:0', grad_fn=<MeanBackward0>)
batch= 1
torch.Size([32, 16, 256])
torch.Size([32, 2])
torch.Size([32])
LabelSmoothingCrossEntropy()
confidence, nll_loss, self.smoothing, smooth_loss
0.9 tensor([1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 1.3133, 0.3133,
0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
0.3133, 0.3133, 1.3133, 0.3133, 0.3133], device='cuda:0',
grad_fn=<SqueezeBackward1>) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0',
grad_fn=<NegBackward0>)
loss= tensor(0.5039, device='cuda:0', grad_fn=<MeanBackward0>)
batch= 2
torch.Size([32, 16, 256])
torch.Size([32, 2])
torch.Size([32])
LabelSmoothingCrossEntropy()
confidence, nll_loss, self.smoothing, smooth_loss
0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
1.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133,
0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
1.3133, 0.3133, 0.3133, 1.3133, 0.3133], device='cuda:0',
grad_fn=<SqueezeBackward1>) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0',
grad_fn=<NegBackward0>)
loss= tensor(0.4758, device='cuda:0', grad_fn=<MeanBackward0>)
batch= 3
torch.Size([32, 16, 256])
torch.Size([32, 2])
torch.Size([32])
LabelSmoothingCrossEntropy()
confidence, nll_loss, self.smoothing, smooth_loss
0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 1.3133,
0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
0.3133, 0.3133, 0.3133, 0.3133, 1.3133], device='cuda:0',
grad_fn=<SqueezeBackward1>) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0',
grad_fn=<NegBackward0>)
loss= tensor(0.4758, device='cuda:0', grad_fn=<MeanBackward0>)
batch= 4
torch.Size([32, 16, 256])
torch.Size([32, 2])
torch.Size([32])
LabelSmoothingCrossEntropy()
confidence, nll_loss, self.smoothing, smooth_loss
0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133,
0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0',
grad_fn=<SqueezeBackward1>) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0',
grad_fn=<NegBackward0>)
loss= tensor(0.3914, device='cuda:0', grad_fn=<MeanBackward0>)
batch= 5
torch.Size([32, 16, 256])
torch.Size([32, 2])
torch.Size([32])
LabelSmoothingCrossEntropy()
confidence, nll_loss, self.smoothing, smooth_loss
0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 1.3133, 0.3133, 1.3133,
0.3133, 0.3133, 1.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133,
0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133,
0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0',
grad_fn=<SqueezeBackward1>) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0',
grad_fn=<NegBackward0>)
loss= tensor(0.5320, device='cuda:0', grad_fn=<MeanBackward0>)
batch= 6
torch.Size([32, 16, 256])
torch.Size([32, 2])
torch.Size([32])
LabelSmoothingCrossEntropy()
confidence, nll_loss, self.smoothing, smooth_loss
0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133,
0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133,
0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0',
grad_fn=<SqueezeBackward1>) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0',
grad_fn=<NegBackward0>)
loss= tensor(0.4758, device='cuda:0', grad_fn=<MeanBackward0>)
batch= 7
torch.Size([32, 16, 256])
torch.Size([32, 2])
torch.Size([32])
LabelSmoothingCrossEntropy()
confidence, nll_loss, self.smoothing, smooth_loss
0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133,
0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0',
grad_fn=<SqueezeBackward1>) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0',
grad_fn=<NegBackward0>)
loss= tensor(0.3914, device='cuda:0', grad_fn=<MeanBackward0>)
batch= 8
torch.Size([32, 16, 256])
torch.Size([32, 2])
torch.Size([32])
LabelSmoothingCrossEntropy()
confidence, nll_loss, self.smoothing, smooth_loss
0.9 tensor([0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
0.3133, 0.3133, 0.3133, 1.3133, 0.3133], device='cuda:0',
grad_fn=<SqueezeBackward1>) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0',
grad_fn=<NegBackward0>)
loss= tensor(0.4476, device='cuda:0', grad_fn=<MeanBackward0>)
------Validating-------
output = tensor([[1.0000e+00, 3.4279e-08],
[1.0000e+00, 6.0936e-08],
[1.0000e+00, 5.2408e-08],
[1.0000e+00, 5.2228e-08],
[1.0000e+00, 2.9131e-08],
[1.0000e+00, 3.5435e-08],
[1.0000e+00, 5.2962e-08],
[1.0000e+00, 2.4625e-08],
[1.0000e+00, 3.3888e-08],
[1.0000e+00, 3.9828e-08],
[1.0000e+00, 4.6138e-08],
[1.0000e+00, 4.5371e-08],
[1.0000e+00, 3.9567e-08],
[1.0000e+00, 2.7549e-08],
[1.0000e+00, 1.9132e-08],
[1.0000e+00, 2.4353e-08],
[1.0000e+00, 3.3184e-08],
[1.0000e+00, 3.1163e-08],
[1.0000e+00, 2.8294e-08],
[1.0000e+00, 2.8660e-08],
[1.0000e+00, 3.4565e-08],
[1.0000e+00, 3.7043e-08],
[1.0000e+00, 4.4650e-08],
[1.0000e+00, 2.7560e-08],
[1.0000e+00, 4.3316e-08],
[1.0000e+00, 5.1780e-08],
[1.0000e+00, 6.0495e-08],
[1.0000e+00, 5.4379e-08],
[1.0000e+00, 5.9354e-08],
[1.0000e+00, 5.2783e-08],
[1.0000e+00, 6.3712e-08],
[1.0000e+00, 3.1180e-08]], device='cuda:0')
target= tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
eval_loss= tensor(1.3133, device='cuda:0')
eval_acc1= tensor(0., device='cuda:0')
------Training-------
batch= 0
torch.Size([32, 16, 256])
torch.Size([32, 2])
torch.Size([32])
LabelSmoothingCrossEntropy()
confidence, nll_loss, self.smoothing, smooth_loss
0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133,
0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
1.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 1.3133,
0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0',
grad_fn=<SqueezeBackward1>) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0',
grad_fn=<NegBackward0>)
loss= tensor(0.5039, device='cuda:0', grad_fn=<MeanBackward0>)
batch= 1
torch.Size([32, 16, 256])
torch.Size([32, 2])
torch.Size([32])
LabelSmoothingCrossEntropy()
confidence, nll_loss, self.smoothing, smooth_loss
0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133,
1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
0.3133, 0.3133, 0.3133, 1.3133, 0.3133], device='cuda:0',
grad_fn=<SqueezeBackward1>) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0',
grad_fn=<NegBackward0>)
loss= tensor(0.4476, device='cuda:0', grad_fn=<MeanBackward0>)
batch= 2
torch.Size([32, 16, 256])
torch.Size([32, 2])
torch.Size([32])
LabelSmoothingCrossEntropy()
confidence, nll_loss, self.smoothing, smooth_loss
0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
0.3133, 1.3133, 1.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133,
0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 1.3133,
0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0',
grad_fn=<SqueezeBackward1>) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0',
grad_fn=<NegBackward0>)
loss= tensor(0.5039, device='cuda:0', grad_fn=<MeanBackward0>)
batch= 3
torch.Size([32, 16, 256])
torch.Size([32, 2])
torch.Size([32])
LabelSmoothingCrossEntropy()
confidence, nll_loss, self.smoothing, smooth_loss
0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0',
grad_fn=<SqueezeBackward1>) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0',
grad_fn=<NegBackward0>)
loss= tensor(0.4195, device='cuda:0', grad_fn=<MeanBackward0>)
batch= 4
torch.Size([32, 16, 256])
torch.Size([32, 2])
torch.Size([32])
LabelSmoothingCrossEntropy()
confidence, nll_loss, self.smoothing, smooth_loss
0.9 tensor([1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133,
0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133,
1.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0',
grad_fn=<SqueezeBackward1>) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133,
0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0',
grad_fn=<NegBackward0>)
loss= tensor(0.4476, device='cuda:0', grad_fn=<MeanBackward0>)