The fastai
data block API cannot give us a tuple (input
,target
) data for our Deep Learning model.
That's why we need to create custom Transform
s to fix this problem and leverage anyway the high level framework.
path = Path("../data")
assert path.is_dir()
train_pivot = get_train_df(path, only_faulty=True, pivot=True)
print("Train pivot shape: ", train_pivot.shape)
train_pivot.head(n=3)
train_example = train_pivot.sample().iloc[0]
train_example.name
With the ReadImagePathFromIndex
transform we can get the first information of the training example, the image path for train_example
from train_pivot
.
In setup we need to specify the prefix to be added to the ImageId
of the training image.
x_tfm = ReadImagePathFromIndex(pref=(path/"train_images"))
x = x_tfm(train_example)
test_eq(x, str(path/"train_images"/train_example.name))
With the ReadRLEs
transform we can get our labels, the list of RLEs (one for each ClassId
s).
In the setup we need to specify the column names of the ClassId
.
In input we pass the train_example
and we get a list of strings as output.
cols = [1,2,3,4]
y_tfm = ReadRLEs(cols=cols)
rles = y_tfm(train_example)
test_eq(len(rles), 4)
test_eq(rles, [train_example[i] if not train_example[i] is np.nan else '' for i in cols])
The MakeMask
transform needs a list of RLEs and returns a mask with (256, 1600)
shape if flatten
is True
(default). If flatten
is False
returns a (256, 1600, 4)
array.
mask_tfm = MakeMask(flatten=False)
mask = mask_tfm(rles)
test_eq(mask.shape, (256,1600,4))
# Default transform with flatten mask for PILMask.create
flatten_mask_tfm = MakeMask()
flatten_mask = flatten_mask_tfm(rles)
test_eq(flatten_mask.shape, (256,1600))
plt.figure(figsize=(15,5))
plt.xticks([])
plt.yticks([])
plt.imshow(flatten_mask);
rle = mask_tfm.decode(mask)
test_eq(rle, rles)
flatten_rle = flatten_mask_tfm.decode(flatten_mask)
test_eq(flatten_rle, rles)
Finally, a Datasets
object can be built from the two Pipeline
s created with the previous transforms.
x_tfms = Pipeline([x_tfm, PILImage.create])
y_tfms = Pipeline([y_tfm, flatten_mask_tfm, PILMask.create])
dsets = Datasets(train_pivot, [x_tfms, y_tfms])
elem = dsets.train[1]
image, mask = elem
type(elem), image, mask
_,axs = plt.subplots(1,3, figsize=(20, 5))
image.show(ctx=axs[0], title='image')
mask.show(alpha=1, ctx=axs[1], vmin=1, vmax=30, title='mask')
image.show(ctx=axs[2], title='superimposed')
mask.show(ctx=axs[2], vmin=1, vmax=30);
The ChannelMask
transform changes the shape of the mask from a flatten (256, 1600)
to (4, 256, 1600)
.
tens = ToTensor()
timg, tmask = tens(elem)
timg.shape, tmask.shape, tmask.dim()
tfm = ChannelMask()
ch_mask = tfm(tmask)
ch_mask.shape, ch_mask.dim()
decoded_mask = tfm.decodes(ch_mask)
decoded_mask.shape
test_close(decoded_mask, tmask)
show_images((decoded_mask,tmask), figsize=(15,5));
It works with batches:
bs_tmask = tmask.unsqueeze(0).expand(6, -1, -1)
tfm = ChannelMask()
bs_ch_mask = tfm(bs_tmask)
bs_ch_mask.shape, bs_ch_mask.dim()
decoded_bs_mask = tfm.decodes(bs_ch_mask)
decoded_bs_mask.shape
for ch, tmp_mask in enumerate(bs_ch_mask):
test_close(decoded_bs_mask[ch, ...], bs_tmask[ch, ...])
img, mask = elem
img, mask = np.array(img), np.array(mask)
img.shape, mask.shape
Some augmentations from the albumentation
library:
import cv2
def show_aug(aug, img, mask):
aug_elem = aug(image=img, mask=mask)
aug_crop_img = aug_elem["image"]
aug_crop_mask = aug_elem["mask"]
print(aug_crop_img.shape, aug_crop_mask.shape)
print(f"Unique elems in mask: {np.unique(aug_crop_mask)}")
show_images((aug_crop_img, aug_crop_mask), figsize=(10,20))
return aug_crop_img, aug_crop_mask
aug = alb.CropNonEmptyMaskIfExists(256, 400, p=1., ignore_values=[0])
aug_crop_img, aug_crop_mask = show_aug(aug, img, mask)
aug = alb.VerticalFlip(p=1.)
aug_img, aug_mask = show_aug(aug, aug_crop_img, aug_crop_mask)
aug = alb.HorizontalFlip(p=1.)
aug_img, aug_mask = show_aug(aug, aug_crop_img, aug_crop_mask)
aug = alb.ElasticTransform(p=1., alpha=120, sigma=120 * 0.05, alpha_affine=120 * 0.03)
aug_img, aug_mask = show_aug(aug, aug_crop_img, aug_crop_mask)
aug = alb.GridDistortion(p=1.)
aug_img, aug_mask = show_aug(aug, aug_crop_img, aug_crop_mask)
aug = alb.OpticalDistortion(distort_limit=0.5, shift_limit=0.05, p=1., border_mode=cv2.BORDER_REPLICATE)
aug_img, aug_mask = show_aug(aug, aug_crop_img, aug_crop_mask)
All is wrapped up in get_train_aug
and get_valid_aug
for training and validation augmentations.
Then AlbumentationTransform
is a mixed Transform for the DataBlock.
train_aug = get_train_aug(256, 400)
aug_img, aug_mask = show_aug(train_aug, aug_crop_img, aug_crop_mask)
valid_aug = get_valid_aug(256, 400)
aug_img, aug_mask = show_aug(valid_aug, aug_crop_img, aug_crop_mask)
alb_tfm = AlbumentationsTransform(train_aug, valid_aug)
alb_aug_elem = alb_tfm(elem, split_idx=0)
show_images(alb_aug_elem, figsize=(15, 10))
splitter = TrainTestSplitter(0.15)
block = SteelDataBlock(path, splitter)
dls = block.dataloaders(
source=train_pivot,
bs=16,
num_workers=0
)
xb, yb = dls.one_batch()
xb.shape, yb.shape
The scikit-learn StratifiedKFold class for this dataset.
nsplits = 2
# df = train_pivot[[1,2,3,4]].stack().to_frame().reset_index()
df = train_pivot.reset_index()
X = df["ImageId"].to_numpy()
y = df["ClassIds"].to_numpy()
X.shape, y.shape
skf = StratifiedKFold(n_splits=nsplits, shuffle=True)
dsets = {i: _ for i in range(nsplits)}
for i, (train_index, valid_index) in enumerate(skf.split(X, y)):
print(
f"{i}-fold:",
f"Train: #{len(train_index)}, e.g. {train_index[:5]}",
f"Valid: #{len(valid_index)}, e.g. {valid_index[:5]}",
sep='\n', end='\n\n')
splits = get_kfold_splits(train_pivot, nsplits=2)
splits
dls = SteelDataLoaders(block, train_pivot, bs=16, size=(256, 400))
xb, yb = dls.one_batch()
xb.shape, yb.shape
for idx, split in enumerate(splits):
block = SteelDataBlock(path, splitter=KFoldSplitter(splits, idx))
dls = SteelDataLoaders(block, train_pivot, bs=16, size=(256, 400))
print(f"{i} - Train items:\n{dls.train.items.ClassIds.value_counts()}")
print(f"{i} - Valid items:\n{dls.valid.items.ClassIds.value_counts()}")