Awesome

deeplabv3plus-gluon-mxnet

this repo is guide to segmentation using deeplabv3plus as sample code

deeplabv3plus_gluon

this repo is want to guide to Semantic Segmentation with Deep Learning using gluon

this repo attemps to reproduce Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation use gluon reimplementation of DeepLabV3+ , but still different in current version

I use DenseNet as backbone instead of DCNN layer(Atrous Conv)

Requirements

mxnet 1.1.0

quick start

1. clone( download)
2. execution deeplabv3+  ( training on VOC_less just 7 image)
3. show our training result ( you can change code in line 351)

tutorials

generate dataloader for segmentaion model

in the segmentaion task, we want to assign 
each pixel in the image an object class.
2 case of segmentaion task

1. binary case (front / background)
2. multiple case (many object in one image)

class introduction##

label_indices: for mutiple class we want formulate segmentation task as classfication problem and use lookup table for any class

read_images: read dataset from our folder

load image first we should check which label format is our expect(binary or multiple class) and normalize our training data

normalize_image img = img / 255

getitem we can use any data augmentation method in this function like random crop or resize ,in this sample we just use resize

class SegDataset(gluon.data.Dataset):
    def __init__(self,root,resize,colormap=None,classes=None):
        self.root = root
        self.resize = resize
        self.colormap = colormap
        self.classes = classes
        self.colormap2label = None
        self.load_images()

    def label_indices(self,img):  
        if self.colormap2label is None:
            self.colormap2label = nd.zeros(256**3)

            for i, cm in enumerate(self.colormap):

                self.colormap2label[(cm[0] * 256 + cm[1]) * 256 + cm[2]] = i
        data = img.astype('int32')
        idx = (data[:, :, 0] * 256 + data[:, :, 1]) * 256 + data[:, :, 2]
        return self.colormap2label[idx]

    def read_images(self,root):

        dataroot = root + 'data/'  # left_frames   #data
        labelroot = root + 'label/'  # labels   #label
        DataNamelist = [f for f in listdir(dataroot)]
        labelNamelist = [f for f in listdir(labelroot)]

        if len(DataNamelist) != len(labelNamelist):
            raise ValueError('number of your data is different from label')
        else:
            data, label = [None] * len(DataNamelist), [None] * len(labelNamelist)

            for i, name in enumerate(DataNamelist):
                data[i] = image.imread(dataroot + name)

            for i, name in enumerate(labelNamelist):
                label[i] = image.imread(labelroot + name)

            return data, label
    def load_images(self):
        data, label = self.read_images(root=self.root)
        self.data = [self.normalize_image(im) for im in data]
        if self.colormap is None:
            self.label = [self.normalize_image(im) for im in label]

        if self.colormap != None:
            self.label = label

        print('read ' + str(len(self.data)) + ' examples')
    def normalize_image(self,data):
        return data.astype('float32') / 255

    def __getitem__(self, item):
        if self.colormap is None:
            data = image.imresize(self.data[item], self.resize[0], self.resize[1])
            label = image.imresize(self.label[item], self.resize[0], self.resize[1])

            return data.transpose((2, 0, 1)), label.transpose((2,0,1))
        if self.colormap != None:
            data = image.imresize(self.data[item], self.resize[0], self.resize[1])
            label = image.imresize(self.label[item], self.resize[0], self.resize[1])

            return data.transpose((2, 0, 1)), self.label_indices(label)


    def __len__(self):
        return len(self.data)

Dataloader

voc_colormap = [[0, 0, 0], [128, 0, 0], [0, 128, 0],
                     [128, 128, 0],
                     [0, 0, 128], [128, 0, 128], [0, 128, 128],
                     [128, 128, 128], [64, 0, 0], [192, 0, 0],
                     [64, 128, 0], [192, 128, 0], [64, 0, 128],
                     [192, 0, 128], [64, 128, 128], [192, 128, 128],
                     [0, 64, 0], [128, 64, 0], [0, 192, 0],
                     [128, 192, 0], [0, 64, 128]]
                     
classes = ['background', 'aeroplane', 'bicycle', 'bird',
                    'boat', 'bottle', 'bus', 'car', 'cat', 'chair',
                    'cow', 'diningtable', 'dog', 'horse', 'motorbike',
                    'person', 'potted plant', 'sheep', 'sofa',
                    'train', 'tv/monitor']
                    
def LoadDataset(dir, batchsize, output_shape, colormap = None, classes=None):
# select your dataset format (colormap, classes optional)
    dataset = SegDataset(dir, output_shape,colormap,classes)
    data_iter = gdata.DataLoader(dataset, batchsize, shuffle=False)

    return data_iter

dir = 'VOC_less/'
ctx = mx.gpu()
batch_size = 3
resize = (480, 320)
train_iter = LoadDataset(dir, batch_size, resize, voc_colormap, classes)   # default is for 2 class if you want to multiclass

network arch

follow this architecture but use densenet instead of DCNN layer

this is original network

this is my implement network


class Deeplabv3(nn.HybridBlock):
    def __init__(self,growth_rate,numofcls):
        super(Deeplabv3, self).__init__()
        self.feature_extract = nn.HybridSequential()
        with self.feature_extract.name_scope():
            self.feature_extract.add(
                stemblock(256),
                DenseBlcok(6, growth_rate),
                nn.BatchNorm(),
                nn.Activation('relu')
            )
        self.conv1 = nn.HybridSequential()
        with self.conv1.name_scope():
            self.conv1.add(
                nn.Conv2D(32, kernel_size=1, strides=2),
                nn.BatchNorm(),
                nn.Activation('relu')
            )

        self.conv3r6 = nn.HybridSequential()
        with self.conv3r6.name_scope():
            self.conv3r6.add(
                nn.Conv2D(32, kernel_size=3, strides=2, padding=6, dilation=6),
                nn.BatchNorm(),
                nn.Activation('relu')
            )
        self.conv3r12 = nn.HybridSequential()
        with self.conv3r12.name_scope():
            self.conv3r12.add(
                nn.Conv2D(32, kernel_size=3, strides=2, padding=12, dilation=12),
                nn.BatchNorm(),
                nn.Activation('relu')
            )
        self.conv3r18 = nn.HybridSequential()
        with self.conv3r18.name_scope():
            self.conv3r18.add(
                nn.Conv2D(32,kernel_size=3,strides=2,padding=18,dilation=18),
                nn.BatchNorm(),
                nn.Activation('relu')
            )

        self.maxpool = nn.MaxPool2D(pool_size=2,strides=2)

        self.concatconv1 = nn.HybridSequential()
        with self.concatconv1.name_scope():
            self.concatconv1.add(

                nn.Conv2D(256,kernel_size=1),
                nn.BatchNorm(),
                nn.Activation('relu')
            )

        self.feconv1 = nn.HybridSequential()
        with self.feconv1.name_scope():
            self.feconv1.add(
                nn.Conv2D(256,kernel_size=1),
                nn.BatchNorm(),
                nn.Activation('relu')
            )
        self.transUp = nn.HybridSequential()
        with self.transUp.name_scope():
            self.transUp.add(
                nn.Conv2DTranspose(256,kernel_size=4,padding=1,strides=2),
                nn.BatchNorm(),
                nn.Activation('relu')
            )
        self.decodeConv3 = nn.HybridSequential()
        with self.decodeConv3.name_scope():
            self.decodeConv3.add(
                nn.Conv2D(256, kernel_size=3, padding=1, strides=1),
                nn.BatchNorm(),
                nn.Activation('relu')
            )
        self.Up4 = nn.HybridSequential()
        with self.Up4.name_scope():
                self.Up4.add(
                    nn.Conv2DTranspose(256, kernel_size=4, padding=1, strides=2),
                    nn.BatchNorm(),
                    nn.Activation('relu'),
                    nn.Conv2DTranspose(numofcls,kernel_size=4,padding=1,strides=2)
                             )


    def hybrid_forward(self, F, x):
        out = self.feature_extract(x)

        conv1out = self.conv1(out)
        conv3r6out = self.conv3r6(out)
        conv3r12out = self.conv3r12(out)
        conv3r18out = self.conv3r18(out)
        maxpoolout = self.maxpool(out)

        second_out = ndarray.concat(conv1out,conv3r6out,conv3r12out,conv3r18out,maxpoolout, dim = 1)
        encoder_out = self.concatconv1(second_out)
        encoderUp = self.transUp(encoder_out)
        feconv1out = self.feconv1(out)

        combine_out = ndarray.concat(encoderUp, feconv1out, dim=1)
        output = self.decodeConv3(combine_out)
        output = self.Up4(output)

        return output

result

Note

training current model on VOC is ongoing i will keep going on Xception model and Atrous Conv, and fine tuning on benchmark dataset

this repo is want to help people learn more about gluon if you want to complete reproduce ,follow this repo https://github.com/duducheng/deeplabv3p_gluon