diff --git a/README.md b/README.md index da2fc59..c1603ce 100644 --- a/README.md +++ b/README.md @@ -142,7 +142,7 @@ bash tools/dist_train.sh configs/soft_teacher/soft_teacher_faster_rcnn_r50_caffe ``` - To train model on **new dataset**: -The core idea is to convert a new dataset to coco format. Details about it can be found in the [adding new dataset](https://github.com/open-mmlab/mmdetection/blob/master/docs/tutorials/customize_dataset.md). +The core idea is to convert a new dataset to coco format. Details about it can be found in the [adding new dataset](https://github.com/open-mmlab/mmdetection/blob/master/docs/tutorials/customize_dataset.md). See also: tools/dataset/unlabeled_json.py diff --git a/requirements.txt b/requirements.txt index f3d6626..e388d91 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ torchvision mmcv-full wandb prettytable +imagesize diff --git a/ssod/datasets/samplers/semi_sampler.py b/ssod/datasets/samplers/semi_sampler.py index 89b0b07..2ab2687 100644 --- a/ssod/datasets/samplers/semi_sampler.py +++ b/ssod/datasets/samplers/semi_sampler.py @@ -51,6 +51,7 @@ def __init__( self.size_of_dataset = [] cumulative_sizes = [0] + self.cumulative_sizes + data_names = ['supervised', 'unsupervised'] for i, _ in enumerate(self.group_sizes): size_of_dataset = 0 cur_group_inds = np.where(self.flag == i)[0] @@ -62,6 +63,9 @@ def __init__( ) )[0] size_per_dataset = len(cur_group_cur_dataset) + assert size_per_dataset is not 0, ( + f'{data_names[j]} dataset does not contain examples from both' + ' h > w and w > h aspect ratio groups') size_of_dataset = max( size_of_dataset, np.ceil(size_per_dataset / self.sample_ratio[j]) ) diff --git a/tools/dataset/unlabeled_json.py b/tools/dataset/unlabeled_json.py new file mode 100644 index 0000000..9b3db19 --- /dev/null +++ b/tools/dataset/unlabeled_json.py @@ -0,0 +1,47 @@ +"""Generate unlabeled coco dataset json annotations from a folder of images. +Uses imagesize for significant speedup over reading images into memory. + +Example: +python tools/unlabeled_json.py --img-dir --json-out +""" + +import argparse +import glob +import imagesize +import json + + +def folder_to_json(img_dir, json_out_path): + + ext = ('*.jpg', '*.jpeg', '*.png') + paths = [p for paths in [glob.glob(img_dir + e) for e in ext] + for p in paths] + assert len(paths) > 0 + + images = [] + for i, p in enumerate(paths): + w, h = imagesize.get(p) + name = p.split('/')[-1] + + per_image_dict = dict( + id=i, + file_name=name, + width=w, + height=h + ) + + images.append(per_image_dict) + + data = dict(categories=[]) + data['images'] = images + with open(json_out_path, 'w') as f: + json.dump(data, f) + +if __name__ == "__main__": + + parser = argparse.ArgumentParser() + parser.add_argument("--img-dir", type=str) + parser.add_argument("--json-out", type=str) + args = parser.parse_args() + + folder_to_json(args.img_dir, args.json_out)