Path: blob/master/clean/09-small-models-road-to-the-top-part-2.ipynb
807 views
Kernel: python3
In [ ]:
# install fastkaggle if not available try: import fastkaggle except ModuleNotFoundError: !pip install -Uq fastkaggle from fastkaggle import *
Going faster
In [ ]:
comp = 'paddy-disease-classification' path = setup_comp(comp, install='fastai "timm>=0.6.2.dev0"') from fastai.vision.all import * set_seed(42)
In [ ]:
trn_path = Path('sml')
In [ ]:
resize_images(path/'train_images', dest=trn_path, max_size=256, recurse=True)
In [ ]:
dls = ImageDataLoaders.from_folder(trn_path, valid_pct=0.2, seed=42, item_tfms=Resize((256,192))) dls.show_batch(max_n=3)
In [ ]:
def train(arch, item, batch, epochs=5): dls = ImageDataLoaders.from_folder(trn_path, seed=42, valid_pct=0.2, item_tfms=item, batch_tfms=batch) learn = vision_learner(dls, arch, metrics=error_rate).to_fp16() learn.fine_tune(epochs, 0.01) return learn
In [ ]:
learn = train('resnet26d', item=Resize(192), batch=aug_transforms(size=128, min_scale=0.75))
A ConvNeXt model
In [ ]:
arch = 'convnext_small_in22k'
In [ ]:
learn = train(arch, item=Resize(192, method='squish'), batch=aug_transforms(size=128, min_scale=0.75))
Preprocessing experiments
In [ ]:
learn = train(arch, item=Resize(192), batch=aug_transforms(size=128, min_scale=0.75))
In [ ]:
dls = ImageDataLoaders.from_folder(trn_path, valid_pct=0.2, seed=42, item_tfms=Resize(192, method=ResizeMethod.Pad, pad_mode=PadMode.Zeros)) dls.show_batch(max_n=3)
In [ ]:
learn = train(arch, item=Resize((256,192), method=ResizeMethod.Pad, pad_mode=PadMode.Zeros), batch=aug_transforms(size=(171,128), min_scale=0.75))
Test time augmentation
In [ ]:
valid = learn.dls.valid preds,targs = learn.get_preds(dl=valid)
In [ ]:
error_rate(preds, targs)
In [ ]:
learn.dls.train.show_batch(max_n=6, unique=True)
In [ ]:
tta_preds,_ = learn.tta(dl=valid)
In [ ]:
error_rate(tta_preds, targs)
Scaling up
In [ ]:
trn_path = path/'train_images'
In [ ]:
learn = train(arch, epochs=12, item=Resize((480, 360), method=ResizeMethod.Pad, pad_mode=PadMode.Zeros), batch=aug_transforms(size=(256,192), min_scale=0.75))
In [ ]:
tta_preds,targs = learn.tta(dl=learn.dls.valid) error_rate(tta_preds, targs)
Submission
In [ ]:
tst_files = get_image_files(path/'test_images').sorted() tst_dl = learn.dls.test_dl(tst_files)
In [ ]:
preds,_ = learn.tta(dl=tst_dl)
In [ ]:
idxs = preds.argmax(dim=1)
In [ ]:
vocab = np.array(learn.dls.vocab) results = pd.Series(vocab[idxs], name="idxs")
In [ ]:
ss = pd.read_csv(path/'sample_submission.csv') ss['label'] = results ss.to_csv('subm.csv', index=False) !head subm.csv
In [ ]:
if not iskaggle: from kaggle import api api.competition_submit_cli('subm.csv', 'convnext small 256x192 12 epochs tta', comp)
In [ ]:
# This is what I use to push my notebook from my home PC to Kaggle if not iskaggle: push_notebook('jhoward', 'small-models-road-to-the-top-part-2', title='Small models: Road to the Top, Part 2', file='small-models-road-to-the-top-part-2.ipynb', competition=comp, private=True, gpu=True)