refactor(structure): add the solver along with the run script

soapisnotfat · soapisnotfat · commit 1e49822683bf · 2018-09-20T16:48:32.000-04:00
refactor the script into a sovler to keep eveerything more organized
diff --git a/main.py b/main.py
@@ -11,166 +11,142 @@
 from misc import progress_bar
 
 
-# ===========================================================
-# Global variables
-# ===========================================================
-EPOCH = 200  # number of times for each run-through
-BATCH_SIZE = 100  # number of images for each epoch
-ACCURACY = 0  # overall prediction accuracy
-GPU_IN_USE = torch.cuda.is_available()  # whether using GPU
-CLASSES = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')  # 10 classes containing in CIFAR-10 dataset
-
-
-# ===========================================================
-# parser initialization
-# ===========================================================
-parser = argparse.ArgumentParser(description="cifar-10 with PyTorch")
-parser.add_argument('--lr', default=0.001, type=float, help='learning rate')
-parser.add_argument('--epoch', default=EPOCH, type=int, help='number of epochs tp train for')
-parser.add_argument('--trainBatchSize', default=BATCH_SIZE, type=int, help='training batch size')
-parser.add_argument('--testBatchSize', default=BATCH_SIZE, type=int, help='testing batch size')
-args = parser.parse_args()
-
-train_transform = transforms.Compose([transforms.RandomHorizontalFlip(), transforms.ToTensor()])  # dataset training transform
-test_transform = transforms.Compose([transforms.ToTensor()])  # dataset testing transform
-
-
-# ===========================================================
-# Prepare train dataset & test dataset
-# ===========================================================
-print("***** prepare data ******")
-train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=train_transform)
-train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=args.trainBatchSize, shuffle=True)
-
-test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=test_transform)
-test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=args.testBatchSize, shuffle=False)
-print("data preparation......Finished")
-
-# ===========================================================
-# Prepare model
-# ===========================================================
-if GPU_IN_USE:
-    device = torch.device('cuda')
-    cudnn.benchmark = True
-else:
-    device = torch.device('cpu')
-
-print("\n***** prepare model *****")
-# Net = LeNet().to(device)
-
-# Net = AlexNet().to(device)
-
-# Net = VGG11().to(device)
-# Net = VGG13().to(device)
-# Net = VGG16().to(device)
-# Net = VGG19().to(device)
-
-# Net = GoogLeNet().to(device)
-
-# Net = resnet18().to(device)
-# Net = resnet34().to(device)
-# Net = resnet50().to(device)
-# Net = resnet101().to(device)
-# Net = resnet152().to(device)
-
-# Net = DenseNet121().to(device)
-# Net = DenseNet161().to(device)
-# Net = DenseNet169().to(device)
-# Net = DenseNet201().to(device)
-
-Net = WideResNet(depth=28, num_classes=10).to(device)
-
-optimizer = optim.Adam(Net.parameters(), lr=args.lr)  # Adam optimization
-scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[75, 150], gamma=0.5)  # lr decay
-loss_function = nn.CrossEntropyLoss()
-print("model preparation......Finished")
-
-
-# Train
-# ===========================================================
-# data: [torch.cuda.FloatTensor of size 100x3x32x32 (GPU 0)]
-# target: [torch.cuda.LongTensor of size 100 (GPU 0)]
-# output: [torch.cuda.FloatTensor of size 100x10 (GPU 0)]
-# prediction: [[torch.cuda.LongTensor of size 100 (GPU 0)],
-#              [torch.cuda.LongTensor of size 100 (GPU 0)]]
-# ===========================================================
-def train():
-    print("train:")
-    Net.train()
-    train_loss = 0
-    train_correct = 0
-    total = 0
-
-    for batch_num, (data, target) in enumerate(train_loader):
-        data, target = data.to(device), target.to(device)
-        optimizer.zero_grad()
-        output = Net(data)
-        loss = loss_function(output, target)
-        loss.backward()
-        optimizer.step()
-        train_loss += loss.item()
-        prediction = torch.max(output, 1)  # second param "1" represents the dimension to be reduced
-        total += target.size(0)
-
-        # train_correct incremented by one if predicted right
-        train_correct += np.sum(prediction[1].cpu().numpy() == target.cpu().numpy())
-
-        progress_bar(batch_num, len(train_loader), 'Loss: %.4f | Acc: %.3f%% (%d/%d)'
-                     % (train_loss / (batch_num + 1), 100. * train_correct / total, train_correct, total))
-
-    return train_loss, train_correct / total
-
-
-# test
-# ===========================================================
-# data: [torch.cuda.FloatTensor of size 100x3x32x32 (GPU 0)]
-# target: [torch.cuda.LongTensor of size 100 (GPU 0)]
-# output: [torch.cuda.FloatTensor of size 100x10 (GPU 0)]
-# prediction: [[torch.cuda.LongTensor of size 100 (GPU 0)],
-#              [torch.cuda.LongTensor of size 100 (GPU 0)]]
-# ===========================================================
-def test():
-    print("test:")
-    Net.eval()
-    test_loss = 0
-    test_correct = 0
-    total = 0
-
-    with torch.no_grad():
-        for batch_num, (data, target) in enumerate(test_loader):
-            data, target = data.to(device), target.to(device)
-            output = Net(data)
-            loss = loss_function(output, target)
-            test_loss += loss.item()
-            prediction = torch.max(output, 1)
+CLASSES = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
+
+
+def main():
+    parser = argparse.ArgumentParser(description="cifar-10 with PyTorch")
+    parser.add_argument('--lr', default=0.001, type=float, help='learning rate')
+    parser.add_argument('--epoch', default=200, type=int, help='number of epochs tp train for')
+    parser.add_argument('--trainBatchSize', default=100, type=int, help='training batch size')
+    parser.add_argument('--testBatchSize', default=100, type=int, help='testing batch size')
+    parser.add_argument('--cuda', default=torch.cuda.is_available(), type=bool, help='whether cuda is in use')
+    args = parser.parse_args()
+
+    solver = Solver(args)
+    solver.run()
+
+
+class Solver(object):
+    def __init__(self, config):
+        self.model = None
+        self.lr = config.lr
+        self.epochs = config.epoch
+        self.train_batch_size = config.trainBatchSize
+        self.test_batch_size = config.testBatchSize
+        self.criterion = None
+        self.optimizer = None
+        self.scheduler = None
+        self.device = None
+        self.cuda = config.cuda
+        self.train_loader = None
+        self.test_loader = None
+
+    def load_data(self):
+        train_transform = transforms.Compose([transforms.RandomHorizontalFlip(), transforms.ToTensor()])
+        test_transform = transforms.Compose([transforms.ToTensor()])
+        train_set = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=train_transform)
+        self.train_loader = torch.utils.data.DataLoader(dataset=train_set, batch_size=self.train_batch_size, shuffle=True)
+        test_set = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=test_transform)
+        self.test_loader = torch.utils.data.DataLoader(dataset=test_set, batch_size=self.test_batch_size, shuffle=False)
+
+    def load_model(self):
+        if self.cuda:
+            self.device = torch.device('cuda')
+            cudnn.benchmark = True
+        else:
+            self.device = torch.device('cpu')
+
+        # self.model = LeNet().to(self.device)
+        # self.model = AlexNet().to(self.device)
+        # self.model = VGG11().to(self.device)
+        # self.model = VGG13().to(self.device)
+        # self.model = VGG16().to(self.device)
+        # self.model = VGG19().to(self.device)
+        # self.model = GoogLeNet().to(self.device)
+        # self.model = resnet18().to(self.device)
+        # self.model = resnet34().to(self.device)
+        # self.model = resnet50().to(self.device)
+        # self.model = resnet101().to(self.device)
+        # self.model = resnet152().to(self.device)
+        # self.model = DenseNet121().to(self.device)
+        # self.model = DenseNet161().to(self.device)
+        # self.model = DenseNet169().to(self.device)
+        # self.model = DenseNet201().to(self.device)
+        self.model = WideResNet(depth=28, num_classes=10).to(self.device)
+
+        self.optimizer = optim.Adam(self.model.parameters(), lr=self.lr)
+        self.scheduler = optim.lr_scheduler.MultiStepLR(self.optimizer, milestones=[75, 150], gamma=0.5)
+        self.criterion = nn.CrossEntropyLoss().to(self.device)
+
+    def train(self):
+        print("train:")
+        self.model.train()
+        train_loss = 0
+        train_correct = 0
+        total = 0
+
+        for batch_num, (data, target) in enumerate(self.train_loader):
+            data, target = data.to(self.device), target.to(self.device)
+            self.optimizer.zero_grad()
+            output = self.model(data)
+            loss = self.criterion(output, target)
+            loss.backward()
+            self.optimizer.step()
+            train_loss += loss.item()
+            prediction = torch.max(output, 1)  # second param "1" represents the dimension to be reduced
             total += target.size(0)
-            test_correct += np.sum(prediction[1].cpu().numpy() == target.cpu().numpy())
-
-            progress_bar(batch_num, len(test_loader), 'Loss: %.4f | Acc: %.3f%% (%d/%d)'
-                         % (test_loss / (batch_num + 1), 100. * test_correct / total, test_correct, total))
-
-    return test_loss, test_correct / total
-
-
-# ===========================================================
-# Save model
-# ===========================================================
-def save():
-    model_out_path = "model.pth"
-    torch.save(Net, model_out_path)
-    print("Checkpoint saved to {}".format(model_out_path))
-
-
-# ===========================================================
-# training and save model
-# ===========================================================
-for epoch in range(1, args.epoch + 1):
-    scheduler.step(epoch)
-    print("\n===> epoch: %d/200" % epoch)
-    train_result = train()
-    print(train_result)
-    test_result = test()
-    ACCURACY = max(ACCURACY, test_result[1])
-    if epoch == args.epoch:
-        print("===> BEST ACC. PERFORMANCE: %.3f%%" % (ACCURACY * 100))
-        save()
+
+            # train_correct incremented by one if predicted right
+            train_correct += np.sum(prediction[1].cpu().numpy() == target.cpu().numpy())
+
+            progress_bar(batch_num, len(self.train_loader), 'Loss: %.4f | Acc: %.3f%% (%d/%d)'
+                         % (train_loss / (batch_num + 1), 100. * train_correct / total, train_correct, total))
+
+        return train_loss, train_correct / total
+
+    def test(self):
+        print("test:")
+        self.model.eval()
+        test_loss = 0
+        test_correct = 0
+        total = 0
+
+        with torch.no_grad():
+            for batch_num, (data, target) in enumerate(self.test_loader):
+                data, target = data.to(self.device), target.to(self.device)
+                output = self.model(data)
+                loss = self.criterion(output, target)
+                test_loss += loss.item()
+                prediction = torch.max(output, 1)
+                total += target.size(0)
+                test_correct += np.sum(prediction[1].cpu().numpy() == target.cpu().numpy())
+
+                progress_bar(batch_num, len(self.test_loader), 'Loss: %.4f | Acc: %.3f%% (%d/%d)'
+                             % (test_loss / (batch_num + 1), 100. * test_correct / total, test_correct, total))
+
+        return test_loss, test_correct / total
+
+    def save(self):
+        model_out_path = "model.pth"
+        torch.save(self.model, model_out_path)
+        print("Checkpoint saved to {}".format(model_out_path))
+
+    def run(self):
+        self.load_data()
+        self.load_model()
+        accuracy = 0
+        for epoch in range(1, self.epochs + 1):
+            self.scheduler.step(epoch)
+            print("\n===> epoch: %d/200" % epoch)
+            train_result = self.train()
+            print(train_result)
+            test_result = self.test()
+            accuracy = max(accuracy, test_result[1])
+            if epoch == self.epochs:
+                print("===> BEST ACC. PERFORMANCE: %.3f%%" % (accuracy * 100))
+                self.save()
+
+
+if __name__ == '__main__':
+    main()
diff --git a/misc.py b/misc.py
@@ -45,7 +45,6 @@ def progress_bar(current, total, msg=None):
     sys.stdout.flush()
 
 
-# return the formatted time
 def format_time(seconds):
     days = int(seconds / 3600/24)
     seconds = seconds - days*3600*24
diff --git a/models/WideResNet.py b/models/WideResNet.py
@@ -52,24 +52,24 @@ def forward(self, x):
 class WideResNet(nn.Module):
     def __init__(self, depth, num_classes, widen_factor=1, drop_rate=0.0):
         super(WideResNet, self).__init__()
-        nChannels = [16, 16 * widen_factor, 32 * widen_factor, 64 * widen_factor]
+        n_channels = [16, 16 * widen_factor, 32 * widen_factor, 64 * widen_factor]
         assert ((depth - 4) % 6 == 0)
-        n = (depth - 4) / 6
+        n = int((depth - 4) / 6)
         block = BasicBlock
         # 1st conv before any network block
-        self.conv1 = nn.Conv2d(3, nChannels[0], kernel_size=3, stride=1,
+        self.conv1 = nn.Conv2d(3, n_channels[0], kernel_size=3, stride=1,
                                padding=1, bias=False)
         # 1st block
-        self.block1 = NetworkBlock(n, nChannels[0], nChannels[1], block, 1, drop_rate)
+        self.block1 = NetworkBlock(n, n_channels[0], n_channels[1], block, 1, drop_rate)
         # 2nd block
-        self.block2 = NetworkBlock(n, nChannels[1], nChannels[2], block, 2, drop_rate)
+        self.block2 = NetworkBlock(n, n_channels[1], n_channels[2], block, 2, drop_rate)
         # 3rd block
-        self.block3 = NetworkBlock(n, nChannels[2], nChannels[3], block, 2, drop_rate)
+        self.block3 = NetworkBlock(n, n_channels[2], n_channels[3], block, 2, drop_rate)
         # global average pooling and classifier
-        self.bn1 = nn.BatchNorm2d(nChannels[3])
+        self.bn1 = nn.BatchNorm2d(n_channels[3])
         self.relu = nn.ReLU(inplace=True)
-        self.fc = nn.Linear(nChannels[3], num_classes)
-        self.nChannels = nChannels[3]
+        self.fc = nn.Linear(n_channels[3], num_classes)
+        self.nChannels = n_channels[3]
 
         for m in self.modules():
             if isinstance(m, nn.Conv2d):