= Path("../data")
path assert path.is_dir()
Transforms
The fastai
data block API cannot give us a tuple (input
,target
) data for our Deep Learning model. That’s why we need to create custom Transform
s to fix this problem and leverage anyway the high level framework.
= get_train_df(path, only_faulty=True, pivot=True)
train_pivot print("Train pivot shape: ", train_pivot.shape)
=3) train_pivot.head(n
Train pivot shape: (6666, 6)
ClassId | 1 | 2 | 3 | 4 | n | ClassIds |
---|---|---|---|---|---|---|
ImageId | ||||||
0002cc93b.jpg | 29102 12 29346 24 29602 24 29858 24 30114 24 30370 24 30626 24 30882 24 31139 23 31395 23 31651 23 31907 23 32163 23 32419 23 32675 23 77918 27 78174 55 78429 60 78685 64 78941 68 79197 72 79452 77 79708 81 79964 85 80220 89 80475 94 80731 98 80987 102 81242 105 81498 105 81754 104 82010 104 82265 105 82521 31 82556 69 82779 27 82818 63 83038 22 83080 57 83297 17 83342 50 83555 13 83604 44 83814 8 83866 37 84073 3 84128 31 84390 25 84652 18 84918 8 85239 10 85476 29 85714 47 85960 57 86216 57 86471 58 86727 58 86983 58 87238 59 87494 59 87750 59 88005 60 88261 60 88517 60 88772 61 89028 53... | NaN | NaN | NaN | 1 | 1 |
0007a71bf.jpg | NaN | NaN | 18661 28 18863 82 19091 110 19347 110 19603 110 19859 110 20115 110 20371 110 20627 110 20883 110 21139 110 21395 110 21651 110 21962 55 293125 251 293381 251 293637 251 293893 251 294149 251 294405 251 294661 251 294917 251 295173 251 295429 251 295685 251 295941 251 296197 251 296453 251 296709 251 296965 251 297221 251 297477 251 297733 251 297989 251 298245 251 298564 188 298945 63 | NaN | 1 | 3 |
000a4bcdd.jpg | 37607 3 37858 8 38108 14 38359 20 38610 25 38863 28 39119 28 39375 29 39631 29 39887 29 40143 29 40399 29 40655 30 40911 30 41167 30 41423 30 41679 31 41935 31 42191 31 42447 31 42703 31 42960 31 43216 31 43472 31 43728 31 43984 31 44240 32 44496 32 44752 32 45008 32 45264 33 45520 33 45776 33 46032 33 46288 33 46544 34 46803 31 47065 25 47327 19 47588 15 47850 9 48112 3 62667 12 62923 23 63179 23 63348 3 63435 23 63604 7 63691 23 63860 11 63947 23 64116 15 64203 23 64372 19 64459 23 64628 24 64715 23 64884 28 64971 23 65139 33 65227 23 65395 37 65483 23 65651 41 65740 22 65907 45 65996 22... | NaN | NaN | NaN | 1 | 1 |
= train_pivot.sample().iloc[0]
train_example train_example.name
'fdab86eb7.jpg'
The Data pipeline: SteelMask Block
Get_x transform
ReadImagePathFromIndex
ReadImagePathFromIndex (pref)
Read image name from train_pivot
and returns the image path
With the ReadImagePathFromIndex
transform we can get the first information of the training example, the image path for train_example
from train_pivot
. In setup we need to specify the prefix to be added to the ImageId
of the training image.
= ReadImagePathFromIndex(pref=(path/"train_images")) x_tfm
= x_tfm(train_example)
x str(path/"train_images"/train_example.name)) test_eq(x,
Get_y transform
ReadRLEs
ReadRLEs (cols=[1, 2, 3, 4])
Read RLEs from train_pivot
and return a list or RLEs.
With the ReadRLEs
transform we can get our labels, the list of RLEs (one for each ClassId
s). In the setup we need to specify the column names of the ClassId
. In input we pass the train_example
and we get a list of strings as output.
= [1,2,3,4]
cols = ReadRLEs(cols=cols)
y_tfm = y_tfm(train_example)
rles len(rles), 4)
test_eq(if not train_example[i] is np.nan else '' for i in cols]) test_eq(rles, [train_example[i]
Get Mask from RLEs
MakeMask
MakeMask (flatten=True)
Read RLEs list and return a np.array of the mask
The MakeMask
transform needs a list of RLEs and returns a mask with (256, 1600)
shape if flatten
is True
(default). If flatten
is False
returns a (256, 1600, 4)
array.
= MakeMask(flatten=False)
mask_tfm = mask_tfm(rles)
mask 256,1600,4))
test_eq(mask.shape, (# Default transform with flatten mask for PILMask.create
= MakeMask()
flatten_mask_tfm = flatten_mask_tfm(rles)
flatten_mask 256,1600)) test_eq(flatten_mask.shape, (
=(15,5))
plt.figure(figsize
plt.xticks([])
plt.yticks([]); plt.imshow(flatten_mask)
= mask_tfm.decode(mask)
rle
test_eq(rle, rles)= flatten_mask_tfm.decode(flatten_mask)
flatten_rle test_eq(flatten_rle, rles)
Finally, a Datasets
object can be built from the two Pipeline
s created with the previous transforms.
= Pipeline([x_tfm, PILImage.create])
x_tfms = Pipeline([y_tfm, flatten_mask_tfm, PILMask.create])
y_tfms = Datasets(train_pivot, [x_tfms, y_tfms])
dsets = dsets.train[1]
elem = elem
image, mask type(elem), image, mask
(tuple, PILImage mode=RGB size=1600x256, PILMask mode=F size=1600x256)
= plt.subplots(1,3, figsize=(20, 5))
_,axs =axs[0], title='image')
image.show(ctx
=1, ctx=axs[1], vmin=1, vmax=30, title='mask')
mask.show(alpha
=axs[2], title='superimposed')
image.show(ctx=axs[2], vmin=1, vmax=30); mask.show(ctx
4-channel Mask
ChannelMask
ChannelMask (enc=None, dec=None, split_idx=None, order=None)
Transform (x,y) tensor masks from [w, h] to [channels, w, h]
The ChannelMask
transform changes the shape of the mask from a flatten (256, 1600)
to (4, 256, 1600)
.
= ToTensor()
tens = tens(elem)
timg, tmask timg.shape, tmask.shape, tmask.dim()
(torch.Size([3, 256, 1600]), torch.Size([256, 1600]), 2)
= ChannelMask()
tfm = tfm(tmask)
ch_mask ch_mask.shape, ch_mask.dim()
(torch.Size([4, 256, 1600]), 3)
= tfm.decodes(ch_mask)
decoded_mask decoded_mask.shape
torch.Size([256, 1600])
test_close(decoded_mask, tmask)
=(15,5)); show_images((decoded_mask,tmask), figsize
It works with batches:
= tmask.unsqueeze(0).expand(6, -1, -1)
bs_tmask = ChannelMask()
tfm = tfm(bs_tmask)
bs_ch_mask bs_ch_mask.shape, bs_ch_mask.dim()
(torch.Size([6, 4, 256, 1600]), 4)
= tfm.decodes(bs_ch_mask)
decoded_bs_mask decoded_bs_mask.shape
torch.Size([6, 256, 1600])
for ch, tmp_mask in enumerate(bs_ch_mask):
test_close(decoded_bs_mask[ch, ...], bs_tmask[ch, ...])
Albumentation transforms
= elem
img, mask = np.array(img), np.array(mask)
img, mask img.shape, mask.shape
((256, 1600, 3), (256, 1600))
Some augmentations from the albumentation
library:
import cv2
def show_aug(aug, img, mask):
= aug(image=img, mask=mask)
aug_elem = aug_elem["image"]
aug_crop_img = aug_elem["mask"]
aug_crop_mask print(aug_crop_img.shape, aug_crop_mask.shape)
print(f"Unique elems in mask: {np.unique(aug_crop_mask)}")
=(10,20))
show_images((aug_crop_img, aug_crop_mask), figsizereturn aug_crop_img, aug_crop_mask
= alb.CropNonEmptyMaskIfExists(256, 400, p=1., ignore_values=[0])
aug = show_aug(aug, img, mask) aug_crop_img, aug_crop_mask
(256, 400, 3) (256, 400)
Unique elems in mask: [0. 3.]
= alb.VerticalFlip(p=1.)
aug = show_aug(aug, aug_crop_img, aug_crop_mask) aug_img, aug_mask
(256, 400, 3) (256, 400)
Unique elems in mask: [0. 3.]
= alb.HorizontalFlip(p=1.)
aug = show_aug(aug, aug_crop_img, aug_crop_mask) aug_img, aug_mask
(256, 400, 3) (256, 400)
Unique elems in mask: [0. 3.]
= alb.ElasticTransform(p=1., alpha=120, sigma=120 * 0.05, alpha_affine=120 * 0.03)
aug = show_aug(aug, aug_crop_img, aug_crop_mask) aug_img, aug_mask
(256, 400, 3) (256, 400)
Unique elems in mask: [0. 3.]
= alb.GridDistortion(p=1.)
aug = show_aug(aug, aug_crop_img, aug_crop_mask) aug_img, aug_mask
(256, 400, 3) (256, 400)
Unique elems in mask: [0. 3.]
= alb.OpticalDistortion(distort_limit=0.5, shift_limit=0.05, p=1., border_mode=cv2.BORDER_REPLICATE)
aug = show_aug(aug, aug_crop_img, aug_crop_mask) aug_img, aug_mask
(256, 400, 3) (256, 400)
Unique elems in mask: [0. 3.]
All is wrapped up in get_train_aug
and get_valid_aug
for training and validation augmentations. Then AlbumentationTransform
is a mixed Transform for the DataBlock.
AlbumentationsTransform
AlbumentationsTransform (train_aug, valid_aug)
A transform handler for multiple Albumentation
transforms
get_valid_aug
get_valid_aug (height, width)
get_train_aug
get_train_aug (height, width)
= get_train_aug(256, 400)
train_aug = show_aug(train_aug, aug_crop_img, aug_crop_mask) aug_img, aug_mask
(256, 400, 3) (256, 400)
Unique elems in mask: [0. 3.]
= get_valid_aug(256, 400)
valid_aug = show_aug(valid_aug, aug_crop_img, aug_crop_mask) aug_img, aug_mask
(256, 400, 3) (256, 400)
Unique elems in mask: [0. 3.]
= AlbumentationsTransform(train_aug, valid_aug)
alb_tfm = alb_tfm(elem, split_idx=0)
alb_aug_elem =(15, 10)) show_images(alb_aug_elem, figsize
SteelMaskBlock
SteelMaskBlock
SteelMaskBlock (train_aug, valid_aug)
SteelDatablock
SteelDataBlock
SteelDataBlock (path, splitter=None, train_aug=None, valid_aug=None, *args, **kwargs)
Get the DataBlock for Severstal Dataset.
= TrainTestSplitter(0.15)
splitter = SteelDataBlock(path, splitter)
block = block.dataloaders(
dls =train_pivot,
source=16,
bs=0
num_workers
)= dls.one_batch()
xb, yb xb.shape, yb.shape
C:\Users\beanTech\miniconda3\envs\steel_segmentation\lib\site-packages\torch\_tensor.py:575: UserWarning: floor_divide is deprecated, and will be removed in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values.
To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at ..\aten\src\ATen\native\BinaryOps.cpp:467.)
return torch.floor_divide(self, other)
(torch.Size([16, 3, 224, 1568]), torch.Size([16, 4, 224, 1568]))
K-Folds strategy
The scikit-learn StratifiedKFold class for this dataset.
= 2
nsplits # df = train_pivot[[1,2,3,4]].stack().to_frame().reset_index()
= train_pivot.reset_index()
df = df["ImageId"].to_numpy()
X = df["ClassIds"].to_numpy()
y X.shape, y.shape
((6578,), (6578,))
= StratifiedKFold(n_splits=nsplits, shuffle=True)
skf = {i: _ for i in range(nsplits)}
dsets for i, (train_index, valid_index) in enumerate(skf.split(X, y)):
print(
f"{i}-fold:",
f"Train: #{len(train_index)}, e.g. {train_index[:5]}",
f"Valid: #{len(valid_index)}, e.g. {valid_index[:5]}",
='\n', end='\n\n') sep
0-fold:
Train: #3289, e.g. [ 1 6 12 13 15]
Valid: #3289, e.g. [0 2 3 4 5]
1-fold:
Train: #3289, e.g. [0 2 3 4 5]
Valid: #3289, e.g. [ 1 6 12 13 15]
get_kfold_splits
get_kfold_splits (df_pivot, nsplits=2)
= get_kfold_splits(train_pivot, nsplits=2) splits
0-fold:
Train: #3289, e.g. [ 0 2 5 6 12]
Valid: #3289, e.g. [1 3 4 7 8]
1-fold:
Train: #3289, e.g. [1 3 4 7 8]
Valid: #3289, e.g. [ 0 2 5 6 12]
KFoldSplitter
KFoldSplitter (splits, idx)
splits
[[(#3289) [0,2,5,6,12,17,18,19,23,26...],
(#3289) [1,3,4,7,8,9,10,11,13,14...]],
[(#3289) [1,3,4,7,8,9,10,11,13,14...],
(#3289) [0,2,5,6,12,17,18,19,23,26...]]]
SteelDataLoaders
SteelDataLoaders
SteelDataLoaders (block, source, bs, *args, **kwargs)
Get the DataLoaders for Severstal Dataset.
= SteelDataLoaders(block, train_pivot, bs=16, size=(256, 400))
dls = dls.one_batch()
xb, yb xb.shape, yb.shape
(torch.Size([16, 3, 224, 1568]), torch.Size([16, 4, 224, 1568]))
for idx, split in enumerate(splits):
= SteelDataBlock(path, splitter=KFoldSplitter(splits, idx))
block = SteelDataLoaders(block, train_pivot, bs=16, size=(256, 400))
dls print(f"{i} - Train items:\n{dls.train.items.ClassIds.value_counts()}")
print(f"{i} - Valid items:\n{dls.valid.items.ClassIds.value_counts()}")
1 - Train items:
3 2346
1 381
4 254
3 4 140
2 97
1 3 45
1 2 18
2 3 7
1 2 3 1
Name: ClassIds, dtype: int64
1 - Valid items:
3 2345
1 382
4 254
3 4 141
2 97
1 3 45
1 2 17
2 3 7
1 2 3 1
Name: ClassIds, dtype: int64
1 - Train items:
3 2345
1 382
4 254
3 4 141
2 97
1 3 45
1 2 17
2 3 7
1 2 3 1
Name: ClassIds, dtype: int64
1 - Valid items:
3 2346
1 381
4 254
3 4 140
2 97
1 3 45
1 2 18
2 3 7
1 2 3 1
Name: ClassIds, dtype: int64