diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7756454 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +.idea/ +build/ +data/ +__pycache__ +output diff --git a/README.md b/README.md index 55d7d1f..35e84a4 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # High-level Semantic Feature Detection: A New Perspective for Pedestrian Detection Keras implementation of [CSP] accepted by CVPR 2019. ## Introduction -This paper provides a new perspective for detecting pedestrians where detection is formulated as Center and Scale Prediction (CSP), the pipeline is illustrated in the following. For more details, please refer to our [paper](./docs/2019CVPR-CSP.pdf). +This paper provides a new perspective for detecting pedestrians where detection is formulated as Center and Scale Prediction (CSP), the pipeline is illustrated in the following. For more details, please refer to our [paper](http://openaccess.thecvf.com/content_CVPR_2019/papers/Liu_High-Level_Semantic_Feature_Detection_A_New_Perspective_for_Pedestrian_Detection_CVPR_2019_paper.pdf). ![img01](./docs/pipeline.png) Besides the superority on pedestrian detection demonstrated in the paper, we take a step further towards the generablity of CSP and validate it on face detection. Experimental reults on WiderFace benchmark also show the competitiveness of CSP. @@ -10,10 +10,10 @@ Besides the superority on pedestrian detection demonstrated in the paper, we tak ### Dependencies -* Python 2.7 -* Tensorflow 1.4.1 -* Keras 2.0.6 -* OpenCV 3.4.1.15 +* Python >= 3.6 +* Tensorflow >= 1.1.3 +* Keras >= 2.0.6 +* OpenCV >= 3.4.1.15 (note that other versions than 3.4.1.15 will result in different performance on Caltech) ## Contents 1. [Installation](#installation) @@ -33,6 +33,17 @@ Besides the superority on pedestrian detection demonstrated in the paper, we tak pip install -r requirements.txt ``` +3. Build dependencies +``` + python setup.py build_ext --inplace +``` + +4. Download pretrained resnet50 weights (basenet only): +``` +./download_weights.sh +``` + + ### Preparation 1. Download the dataset. diff --git a/download_weights.sh b/download_weights.sh new file mode 100755 index 0000000..8f263fa --- /dev/null +++ b/download_weights.sh @@ -0,0 +1,2 @@ +mkdir -p data/models +wget -O ./data/models/resnet50_weights_tf_dim_ordering_tf_kernels.h5 https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels.h5 --no-check-certificate diff --git a/eval_city/cocoapi/PythonAPI/pycocotools/coco.py b/eval_city/cocoapi/PythonAPI/pycocotools/coco.py index dc9972b..7b8b37e 100755 --- a/eval_city/cocoapi/PythonAPI/pycocotools/coco.py +++ b/eval_city/cocoapi/PythonAPI/pycocotools/coco.py @@ -58,7 +58,7 @@ import sys PYTHON_VERSION = sys.version_info[0] if PYTHON_VERSION == 2: - from urllib import urlretrieve + from urllib.request import urlretrieve elif PYTHON_VERSION == 3: from urllib.request import urlretrieve @@ -83,7 +83,7 @@ def __init__(self, annotation_file=None): tic = time.time() dataset = json.load(open(annotation_file, 'r')) assert type(dataset)==dict, 'annotation file format {} not supported'.format(type(dataset)) - print('Done (t={:0.2f}s)'.format(time.time()- tic)) + print(('Done (t={:0.2f}s)'.format(time.time()- tic))) self.dataset = dataset self.createIndex() @@ -123,8 +123,8 @@ def info(self): Print information about the annotation file. :return: """ - for key, value in self.dataset['info'].items(): - print('{}: {}'.format(key, value)) + for key, value in list(self.dataset['info'].items()): + print(('{}: {}'.format(key, value))) def getAnnIds(self, imgIds=[], catIds=[], areaRng=[], iscrowd=None): """ @@ -187,7 +187,7 @@ def getImgIds(self, imgIds=[], catIds=[]): catIds = catIds if _isArrayLike(catIds) else [catIds] if len(imgIds) == len(catIds) == 0: - ids = self.imgs.keys() + ids = list(self.imgs.keys()) else: ids = set(imgIds) for i, catId in enumerate(catIds): @@ -292,7 +292,7 @@ def showAnns(self, anns): ax.add_collection(p) elif datasetType == 'captions': for ann in anns: - print(ann['caption']) + print((ann['caption'])) def loadRes(self, resFile): """ @@ -305,7 +305,7 @@ def loadRes(self, resFile): print('Loading and preparing results...') tic = time.time() - if type(resFile) == str or type(resFile) == unicode: + if type(resFile) == str or type(resFile) == str: anns = json.load(open(resFile)) elif type(resFile) == np.ndarray: anns = self.loadNumpyAnnotations(resFile) @@ -349,7 +349,7 @@ def loadRes(self, resFile): ann['area'] = (x1-x0)*(y1-y0) ann['id'] = id + 1 ann['bbox'] = [x0,y0,x1-x0,y1-y0] - print('DONE (t={:0.2f}s)'.format(time.time()- tic)) + print(('DONE (t={:0.2f}s)'.format(time.time()- tic))) res.dataset['annotations'] = anns res.createIndex() @@ -366,7 +366,7 @@ def download(self, tarDir = None, imgIds = [] ): print('Please specify target directory') return -1 if len(imgIds) == 0: - imgs = self.imgs.values() + imgs = list(self.imgs.values()) else: imgs = self.loadImgs(imgIds) N = len(imgs) @@ -377,7 +377,7 @@ def download(self, tarDir = None, imgIds = [] ): fname = os.path.join(tarDir, img['file_name']) if not os.path.exists(fname): urlretrieve(img['coco_url'], fname) - print('downloaded {}/{} images (t={:0.1f}s)'.format(i, N, time.time()- tic)) + print(('downloaded {}/{} images (t={:0.1f}s)'.format(i, N, time.time()- tic))) def loadNumpyAnnotations(self, data): """ @@ -387,13 +387,13 @@ def loadNumpyAnnotations(self, data): """ print('Converting ndarray to lists...') assert(type(data) == np.ndarray) - print(data.shape) + print((data.shape)) assert(data.shape[1] == 7) N = data.shape[0] ann = [] for i in range(N): if i % 1000000 == 0: - print('{}/{}'.format(i,N)) + print(('{}/{}'.format(i,N))) ann += [{ 'image_id' : int(data[i, 0]), 'bbox' : [ data[i, 1], data[i, 2], data[i, 3], data[i, 4] ], diff --git a/eval_city/cocoapi/PythonAPI/pycocotools/cocoeval.py b/eval_city/cocoapi/PythonAPI/pycocotools/cocoeval.py index 7a4b4ad..84ab931 100755 --- a/eval_city/cocoapi/PythonAPI/pycocotools/cocoeval.py +++ b/eval_city/cocoapi/PythonAPI/pycocotools/cocoeval.py @@ -130,8 +130,8 @@ def evaluate(self): # add backward compatibility if useSegm is specified in params if not p.useSegm is None: p.iouType = 'segm' if p.useSegm == 1 else 'bbox' - print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType)) - print('Evaluate annotation type *{}*'.format(p.iouType)) + print(('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType))) + print(('Evaluate annotation type *{}*'.format(p.iouType))) p.imgIds = list(np.unique(p.imgIds)) if p.useCats: p.catIds = list(np.unique(p.catIds)) @@ -159,7 +159,7 @@ def evaluate(self): ] self._paramsEval = copy.deepcopy(self.params) toc = time.time() - print('DONE (t={:0.2f}s).'.format(toc-tic)) + print(('DONE (t={:0.2f}s).'.format(toc-tic))) def computeIoU(self, imgId, catId): p = self.params @@ -346,7 +346,7 @@ def accumulate(self, p = None): # get inds to evaluate k_list = [n for n, k in enumerate(p.catIds) if k in setK] m_list = [m for n, m in enumerate(p.maxDets) if m in setM] - a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA] + a_list = [n for n, a in enumerate([tuple(x) for x in p.areaRng]) if a in setA] i_list = [n for n, i in enumerate(p.imgIds) if i in setI] I0 = len(_pe.imgIds) A0 = len(_pe.areaRng) @@ -418,7 +418,7 @@ def accumulate(self, p = None): 'scores': scores, } toc = time.time() - print('DONE (t={:0.2f}s).'.format( toc-tic)) + print(('DONE (t={:0.2f}s).'.format( toc-tic))) def summarize(self): ''' @@ -454,7 +454,7 @@ def _summarize( ap=1, iouThr=None, areaRng='all', maxDets=100 ): mean_s = -1 else: mean_s = np.mean(s[s>-1]) - print(iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s)) + print((iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s))) return mean_s def _summarizeDets(): stats = np.zeros((12,)) diff --git a/eval_city/eval_script/coco.py b/eval_city/eval_script/coco.py index f870138..576371a 100755 --- a/eval_city/eval_script/coco.py +++ b/eval_city/eval_script/coco.py @@ -58,7 +58,7 @@ import sys PYTHON_VERSION = sys.version_info[0] if PYTHON_VERSION == 2: - from urllib import urlretrieve + from urllib.request import urlretrieve elif PYTHON_VERSION == 3: from urllib.request import urlretrieve @@ -118,8 +118,8 @@ def info(self): Print information about the annotation file. :return: """ - for key, value in self.dataset['info'].items(): - print('{}: {}'.format(key, value)) + for key, value in list(self.dataset['info'].items()): + print(('{}: {}'.format(key, value))) def getAnnIds(self, imgIds=[], catIds=[], areaRng=[], iscrowd=None): """ @@ -182,7 +182,7 @@ def getImgIds(self, imgIds=[], catIds=[]): catIds = catIds if type(catIds) == list else [catIds] if len(imgIds) == len(catIds) == 0: - ids = self.imgs.keys() + ids = list(self.imgs.keys()) else: ids = set(imgIds) for i, catId in enumerate(catIds): @@ -287,7 +287,7 @@ def showAnns(self, anns): ax.add_collection(p) elif datasetType == 'captions': for ann in anns: - print(ann['caption']) + print((ann['caption'])) def loadRes(self, resFile): """ @@ -300,7 +300,7 @@ def loadRes(self, resFile): # print('Loading and preparing results...') tic = time.time() - if type(resFile) == str or type(resFile) == unicode: + if type(resFile) == str or type(resFile) == str: anns = json.load(open(resFile)) elif type(resFile) == np.ndarray: anns = self.loadNumpyAnnotations(resFile) @@ -364,7 +364,7 @@ def download(self, tarDir = None, imgIds = [] ): print('Please specify target directory') return -1 if len(imgIds) == 0: - imgs = self.imgs.values() + imgs = list(self.imgs.values()) else: imgs = self.loadImgs(imgIds) N = len(imgs) @@ -375,7 +375,7 @@ def download(self, tarDir = None, imgIds = [] ): fname = os.path.join(tarDir, img['file_name']) if not os.path.exists(fname): urlretrieve(img['coco_url'], fname) - print('downloaded {}/{} images (t={:0.1f}s)'.format(i, N, time.time()- tic)) + print(('downloaded {}/{} images (t={:0.1f}s)'.format(i, N, time.time()- tic))) def loadNumpyAnnotations(self, data): """ @@ -385,13 +385,13 @@ def loadNumpyAnnotations(self, data): """ print('Converting ndarray to lists...') assert(type(data) == np.ndarray) - print(data.shape) + print((data.shape)) assert(data.shape[1] == 7) N = data.shape[0] ann = [] for i in range(N): if i % 1000000 == 0: - print('{}/{}'.format(i,N)) + print(('{}/{}'.format(i,N))) ann += [{ 'image_id' : int(data[i, 0]), 'bbox' : [ data[i, 1], data[i, 2], data[i, 3], data[i, 4] ], diff --git a/eval_city/eval_script/coco.pyc b/eval_city/eval_script/coco.pyc deleted file mode 100755 index 1f3d2b2..0000000 Binary files a/eval_city/eval_script/coco.pyc and /dev/null differ diff --git a/eval_city/eval_script/eval_MR_multisetup.py b/eval_city/eval_script/eval_MR_multisetup.py index 1173b71..e16fc3d 100755 --- a/eval_city/eval_script/eval_MR_multisetup.py +++ b/eval_city/eval_script/eval_MR_multisetup.py @@ -122,7 +122,7 @@ def evaluate(self, id_setup): # add backward compatibility if useSegm is specified in params if not p.useSegm is None: p.iouType = 'segm' if p.useSegm == 1 else 'bbox' - print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType)) + print(('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType))) # print('Evaluate annotation type *{}*'.format(p.iouType)) p.imgIds = list(np.unique(p.imgIds)) if p.useCats: @@ -446,7 +446,7 @@ def _summarize(iouThr=None, maxDets=100 ): mean_s = np.log(mrs[mrs<2]) mean_s = np.mean(mean_s) mean_s = np.exp(mean_s) - print(iStr.format(titleStr, typeStr,setupStr, iouStr, heightStr, occlStr, mean_s*100)) + print((iStr.format(titleStr, typeStr,setupStr, iouStr, heightStr, occlStr, mean_s*100))) # res_file.write(iStr.format(titleStr, typeStr,setupStr, iouStr, heightStr, occlStr, mean_s*100)) res_file.write(str(mean_s * 100)) res_file.write('\n') diff --git a/eval_city/eval_script/eval_MR_multisetup.pyc b/eval_city/eval_script/eval_MR_multisetup.pyc deleted file mode 100644 index a0c9e6b..0000000 Binary files a/eval_city/eval_script/eval_MR_multisetup.pyc and /dev/null differ diff --git a/eval_city/eval_script/eval_demo.py b/eval_city/eval_script/eval_demo.py index b32ae63..7d7c6d5 100755 --- a/eval_city/eval_script/eval_demo.py +++ b/eval_city/eval_script/eval_demo.py @@ -8,7 +8,7 @@ annFile = '../val_gt.json' main_path = '../../output/valresults/city/h/off' for f in sorted(os.listdir(main_path)): - print f + print(f) # initialize COCO detections api dt_path = os.path.join(main_path, f) resFile = os.path.join(dt_path,'val_dt.json') diff --git a/generate_cache_caltech.py b/generate_cache_caltech.py index cad6ec8..c1923d2 100755 --- a/generate_cache_caltech.py +++ b/generate_cache_caltech.py @@ -1,5 +1,5 @@ import os -import cPickle +import pickle import numpy as np import matplotlib.pyplot as plt @@ -18,46 +18,47 @@ box_count = 0 files = sorted(os.listdir(all_anno_path)) for l in range(len(files)): - gtname = files[l] - imgname = files[l].split('.')[0]+'.jpg' - img_path = os.path.join(all_img_path, imgname) - gt_path = os.path.join(all_anno_path, gtname) + gtname = files[l] + imgname = files[l].split('.')[0] + '.jpg' + img_path = os.path.join(all_img_path, imgname) + gt_path = os.path.join(all_anno_path, gtname) - boxes = [] - ig_boxes = [] - with open(gt_path, 'rb') as fid: - lines = fid.readlines() - if len(lines)>1: - for i in range(1, len(lines)): - info = lines[i].strip().split(' ') - label = info[0] - occ, ignore = info[5], info[10] - x1, y1 = max(int(float(info[1])), 0), max(int(float(info[2])), 0) - w, h = min(int(float(info[3])), cols - x1 - 1), min(int(float(info[4])), rows - y1 - 1) - box = np.array([int(x1), int(y1), int(x1) + int(w), int(y1) + int(h)]) - if int(ignore) == 0: - boxes.append(box) - else: - ig_boxes.append(box) - boxes = np.array(boxes) - ig_boxes = np.array(ig_boxes) + boxes = [] + ig_boxes = [] + with open(gt_path, 'rb') as fid: + lines = fid.readlines() + if len(lines) > 1: + for i in range(1, len(lines)): + info = lines[i].strip().split(' ') + label = info[0] + occ, ignore = info[5], info[10] + x1, y1 = max(int(float(info[1])), 0), max(int(float(info[2])), 0) + w, h = min(int(float(info[3])), cols - x1 - 1), min(int(float(info[4])), rows - y1 - 1) + box = np.array([int(x1), int(y1), int(x1) + int(w), int(y1) + int(h)]) + if int(ignore) == 0: + boxes.append(box) + else: + ig_boxes.append(box) + boxes = np.array(boxes) + ig_boxes = np.array(ig_boxes) - annotation = {} - annotation['filepath'] = img_path - box_count += len(boxes) - iggt_count += len(ig_boxes) - annotation['bboxes'] = boxes - annotation['ignoreareas'] = ig_boxes - if len(boxes) == 0: - image_data_nogt.append(annotation) - else: - image_data_gt.append(annotation) - valid_count += 1 -print '{} images and {} valid images, {} valid gt and {} ignored gt'.format(len(files), valid_count, box_count, iggt_count) + annotation = {} + annotation['filepath'] = img_path + box_count += len(boxes) + iggt_count += len(ig_boxes) + annotation['bboxes'] = boxes + annotation['ignoreareas'] = ig_boxes + if len(boxes) == 0: + image_data_nogt.append(annotation) + else: + image_data_gt.append(annotation) + valid_count += 1 +print('{} images and {} valid images, {} valid gt and {} ignored gt'.format(len(files), valid_count, box_count, + iggt_count)) if not os.path.exists(res_path_gt): - with open(res_path_gt, 'wb') as fid: - cPickle.dump(image_data_gt, fid, cPickle.HIGHEST_PROTOCOL) + with open(res_path_gt, 'wb') as fid: + pickle.dump(image_data_gt, fid, pickle.HIGHEST_PROTOCOL) if not os.path.exists(res_path_nogt): - with open(res_path_nogt, 'wb') as fid: - cPickle.dump(image_data_nogt, fid, cPickle.HIGHEST_PROTOCOL) \ No newline at end of file + with open(res_path_nogt, 'wb') as fid: + pickle.dump(image_data_nogt, fid, pickle.HIGHEST_PROTOCOL) diff --git a/generate_cache_city.py b/generate_cache_city.py index 8172b6e..00c230e 100755 --- a/generate_cache_city.py +++ b/generate_cache_city.py @@ -1,7 +1,6 @@ -from __future__ import division import os import cv2 -import cPickle +import pickle import numpy as np from scipy import io as scio import time @@ -15,54 +14,56 @@ rows, cols = 1024, 2048 for type in types: - anno_path = os.path.join(all_anno_path, 'anno_'+type+'.mat') - res_path = os.path.join('data/cache/cityperson', type) - image_data = [] - annos = scio.loadmat(anno_path) - index = 'anno_'+type+'_aligned' - valid_count = 0 - iggt_count = 0 - box_count = 0 - for l in range(len(annos[index][0])): - anno = annos[index][0][l] - cityname = anno[0][0][0][0].encode() - imgname = anno[0][0][1][0].encode() - gts = anno[0][0][2] - img_path = os.path.join(all_img_path, type + '/'+ cityname+'/'+imgname) - boxes = [] - ig_boxes = [] - vis_boxes = [] - for i in range(len(gts)): - label, x1, y1, w, h = gts[i, :5] - x1, y1 = max(int(x1), 0), max(int(y1), 0) - w, h = min(int(w), cols - x1 -1), min(int(h), rows - y1 -1) - xv1, yv1, wv, hv = gts[i, 6:] - xv1, yv1 = max(int(xv1), 0), max(int(yv1), 0) - wv, hv = min(int(wv), cols - xv1 - 1), min(int(hv), rows - yv1 - 1) + anno_path = os.path.join(all_anno_path, 'anno_' + type + '.mat') + res_path = os.path.join('data/cache/cityperson', type) + image_data = [] + annos = scio.loadmat(anno_path) + index = 'anno_' + type + '_aligned' + valid_count = 0 + iggt_count = 0 + box_count = 0 + for l in range(len(annos[index][0])): + anno = annos[index][0][l] + cityname = anno[0][0][0][0].encode() + imgname = anno[0][0][1][0].encode() + gts = anno[0][0][2] + img_path = os.path.join(all_img_path, type + '/' + cityname + '/' + imgname) + boxes = [] + ig_boxes = [] + vis_boxes = [] + for i in range(len(gts)): + label, x1, y1, w, h = gts[i, :5] + x1, y1 = max(int(x1), 0), max(int(y1), 0) + w, h = min(int(w), cols - x1 - 1), min(int(h), rows - y1 - 1) + xv1, yv1, wv, hv = gts[i, 6:] + xv1, yv1 = max(int(xv1), 0), max(int(yv1), 0) + wv, hv = min(int(wv), cols - xv1 - 1), min(int(hv), rows - yv1 - 1) - if label == 1 and h>=50: - box = np.array([int(x1), int(y1), int(x1)+int(w), int(y1)+int(h)]) - boxes.append(box) - vis_box = np.array([int(xv1), int(yv1), int(xv1)+int(wv), int(yv1)+int(hv)]) - vis_boxes.append(vis_box) - else: - ig_box = np.array([int(x1), int(y1), int(x1)+int(w), int(y1)+int(h)]) - ig_boxes.append(ig_box) - boxes = np.array(boxes) - vis_boxes = np.array(vis_boxes) - ig_boxes = np.array(ig_boxes) + if label == 1 and h >= 50: + box = np.array([int(x1), int(y1), int(x1) + int(w), int(y1) + int(h)]) + boxes.append(box) + vis_box = np.array([int(xv1), int(yv1), int(xv1) + int(wv), int(yv1) + int(hv)]) + vis_boxes.append(vis_box) + else: + ig_box = np.array([int(x1), int(y1), int(x1) + int(w), int(y1) + int(h)]) + ig_boxes.append(ig_box) + boxes = np.array(boxes) + vis_boxes = np.array(vis_boxes) + ig_boxes = np.array(ig_boxes) - if len(boxes)>0: - valid_count += 1 - annotation = {} - annotation['filepath'] = img_path - box_count += len(boxes) - iggt_count += len(ig_boxes) - annotation['bboxes'] = boxes - annotation['vis_bboxes'] = vis_boxes - annotation['ignoreareas'] = ig_boxes - image_data.append(annotation) - if not os.path.exists(res_path): - with open(res_path, 'wb') as fid: - cPickle.dump(image_data, fid, cPickle.HIGHEST_PROTOCOL) - print '{} has {} images and {} valid images, {} valid gt and {} ignored gt'.format(type, len(annos[index][0]), valid_count, box_count, iggt_count) + if len(boxes) > 0: + valid_count += 1 + annotation = {} + annotation['filepath'] = img_path + box_count += len(boxes) + iggt_count += len(ig_boxes) + annotation['bboxes'] = boxes + annotation['vis_bboxes'] = vis_boxes + annotation['ignoreareas'] = ig_boxes + image_data.append(annotation) + if not os.path.exists(res_path): + with open(res_path, 'wb') as fid: + pickle.dump(image_data, fid, pickle.HIGHEST_PROTOCOL) + print('{} has {} images and {} valid images, {} valid gt and {} ignored gt'.format(type, len(annos[index][0]), + valid_count, box_count, + iggt_count)) diff --git a/generate_cache_wider.py b/generate_cache_wider.py index cc37f8b..15145c0 100755 --- a/generate_cache_wider.py +++ b/generate_cache_wider.py @@ -1,12 +1,12 @@ import os import cv2 -import cPickle +import pickle import numpy as np import matplotlib.pyplot as plt root_dir = 'data/WiderFace/' img_path = os.path.join(root_dir, 'WIDER_train/images') -anno_path = os.path.join(root_dir, 'wider_face_split','wider_face_train_bbx_gt.txt') +anno_path = os.path.join(root_dir, 'wider_face_split', 'wider_face_train_bbx_gt.txt') # anno_path = os.path.join(root_dir, 'wider_face_split','wider_face_test_filelist.txt') res_path = 'data/cache/train' @@ -16,38 +16,38 @@ img_count = 0 box_count = 0 with open(anno_path, 'rb') as fid: - lines = fid.readlines() + lines = fid.readlines() num_lines = len(lines) index = 0 -while index0: - for i in range(num_obj): - info = lines[index+2+i].strip().split(' ') - x1, y1 = max(int(info[0]), 0), max(int(info[1]), 0) - w, h = min(int(info[2]), img_width - x1 - 1), min(int(info[3]), img_height - y1 - 1) - if w>=5 and h>=5: - box = np.array([x1, y1, x1+w, y1+h]) - boxes.append(box) - boxes = np.array(boxes) - box_count += len(boxes) - if len(boxes)>0: - valid_count += 1 - annotation = {} - annotation['filepath'] = filepath - annotation['bboxes'] = boxes - image_data.append(annotation) - index += (2+num_obj) +while index < num_lines: + filename = lines[index].strip() + img_count += 1 + if img_count % 1000 == 0: + print(img_count) + num_obj = int(lines[index + 1]) + filepath = os.path.join(img_path, filename) + img = cv2.imread(filepath) + img_height, img_width = img.shape[:2] + boxes = [] + if num_obj > 0: + for i in range(num_obj): + info = lines[index + 2 + i].strip().split(' ') + x1, y1 = max(int(info[0]), 0), max(int(info[1]), 0) + w, h = min(int(info[2]), img_width - x1 - 1), min(int(info[3]), img_height - y1 - 1) + if w >= 5 and h >= 5: + box = np.array([x1, y1, x1 + w, y1 + h]) + boxes.append(box) + boxes = np.array(boxes) + box_count += len(boxes) + if len(boxes) > 0: + valid_count += 1 + annotation = {} + annotation['filepath'] = filepath + annotation['bboxes'] = boxes + image_data.append(annotation) + index += (2 + num_obj) -print '{} images and {} valid images and {} boxes'.format(img_count, valid_count,box_count) +print('{} images and {} valid images and {} boxes'.format(img_count, valid_count, box_count)) with open(res_path, 'wb') as fid: - cPickle.dump(image_data, fid, cPickle.HIGHEST_PROTOCOL) \ No newline at end of file + pickle.dump(image_data, fid, pickle.HIGHEST_PROTOCOL) diff --git a/keras_csp/__init__.pyc b/keras_csp/__init__.pyc deleted file mode 100644 index 676371a..0000000 Binary files a/keras_csp/__init__.pyc and /dev/null differ diff --git a/keras_csp/bbox_process.py b/keras_csp/bbox_process.py index 97a8107..d2f0553 100644 --- a/keras_csp/bbox_process.py +++ b/keras_csp/bbox_process.py @@ -1,18 +1,18 @@ -from __future__ import division import numpy as np -from nms_wrapper import nms +from keras_csp.nms_wrapper import nms -def parse_det(Y, C, score=0.1, down=4,scale='h'): + +def parse_det(Y, C, score=0.1, down=4, scale='h'): seman = Y[0][0, :, :, 0] - if scale=='h': - height = np.exp(Y[1][0, :, :, 0])*down - width = 0.41*height - elif scale=='w': - width = np.exp(Y[1][0, :, :, 0])*down - height = width/0.41 - elif scale=='hw': - height = np.exp(Y[1][0, :, :, 0])*down - width = np.exp(Y[1][0, :, :, 1])*down + if scale == 'h': + height = np.exp(Y[1][0, :, :, 0]) * down + width = 0.41 * height + elif scale == 'w': + width = np.exp(Y[1][0, :, :, 0]) * down + height = width / 0.41 + elif scale == 'hw': + height = np.exp(Y[1][0, :, :, 0]) * down + width = np.exp(Y[1][0, :, :, 1]) * down y_c, x_c = np.where(seman > score) boxs = [] if len(y_c) > 0: @@ -20,13 +20,14 @@ def parse_det(Y, C, score=0.1, down=4,scale='h'): h = height[y_c[i], x_c[i]] w = width[y_c[i], x_c[i]] s = seman[y_c[i], x_c[i]] - x1, y1 = max(0, (x_c[i]+0.5) * down - w / 2), max(0, (y_c[i]+0.5) * down - h / 2) + x1, y1 = max(0, (x_c[i] + 0.5) * down - w / 2), max(0, (y_c[i] + 0.5) * down - h / 2) boxs.append([x1, y1, min(x1 + w, C.size_test[1]), min(y1 + h, C.size_test[0]), s]) boxs = np.asarray(boxs, dtype=np.float32) keep = nms(boxs, 0.5, usegpu=False, gpu_id=0) boxs = boxs[keep, :] return boxs + def parse_det_top(Y, C, score=0.1): seman = Y[0][0, :, :, 0] height = Y[1][0, :, :, 0] @@ -44,6 +45,7 @@ def parse_det_top(Y, C, score=0.1): boxs = boxs[keep, :] return boxs + def parse_det_bottom(Y, C, score=0.1): seman = Y[0][0, :, :, 0] height = Y[1][0, :, :, 0] @@ -54,14 +56,15 @@ def parse_det_bottom(Y, C, score=0.1): h = np.exp(height[y_c[i], x_c[i]]) * 4 w = 0.41 * h s = seman[y_c[i], x_c[i]] - x1, y1 = max(0, x_c[i] * 4 + 2 - w / 2), max(0, y_c[i] * 4 + 2-h) + x1, y1 = max(0, x_c[i] * 4 + 2 - w / 2), max(0, y_c[i] * 4 + 2 - h) boxs.append([x1, y1, min(x1 + w, C.size_test[1]), min(y1 + h, C.size_test[0]), s]) boxs = np.asarray(boxs, dtype=np.float32) keep = nms(boxs, 0.5, usegpu=False, gpu_id=0) boxs = boxs[keep, :] return boxs -def parse_det_offset(Y, C, score=0.1,down=4): + +def parse_det_offset(Y, C, score=0.1, down=4): seman = Y[0][0, :, :, 0] height = Y[1][0, :, :, 0] offset_y = Y[2][0, :, :, 0] @@ -71,7 +74,7 @@ def parse_det_offset(Y, C, score=0.1,down=4): if len(y_c) > 0: for i in range(len(y_c)): h = np.exp(height[y_c[i], x_c[i]]) * down - w = 0.41*h + w = 0.41 * h o_y = offset_y[y_c[i], x_c[i]] o_x = offset_x[y_c[i], x_c[i]] s = seman[y_c[i], x_c[i]] @@ -82,7 +85,8 @@ def parse_det_offset(Y, C, score=0.1,down=4): boxs = boxs[keep, :] return boxs -def parse_wider_offset(Y, C, score=0.1,down=4,nmsthre=0.5): + +def parse_wider_offset(Y, C, score=0.1, down=4, nmsthre=0.5): seman = Y[0][0, :, :, 0] height = Y[1][0, :, :, 0] width = Y[1][0, :, :, 1] @@ -101,12 +105,13 @@ def parse_wider_offset(Y, C, score=0.1,down=4,nmsthre=0.5): x1, y1 = min(x1, C.size_test[1]), min(y1, C.size_test[0]) boxs.append([x1, y1, min(x1 + w, C.size_test[1]), min(y1 + h, C.size_test[0]), s]) boxs = np.asarray(boxs, dtype=np.float32) - #keep = nms(boxs, nmsthre, usegpu=False, gpu_id=0) - #boxs = boxs[keep, :] - boxs = soft_bbox_vote(boxs,thre=nmsthre) + # keep = nms(boxs, nmsthre, usegpu=False, gpu_id=0) + # boxs = boxs[keep, :] + boxs = soft_bbox_vote(boxs, thre=nmsthre) return boxs -def soft_bbox_vote(det,thre=0.35,score=0.05): + +def soft_bbox_vote(det, thre=0.35, score=0.05): if det.shape[0] <= 1: return det order = det[:, 4].ravel().argsort()[::-1] @@ -160,7 +165,8 @@ def soft_bbox_vote(det,thre=0.35,score=0.05): dets = dets[order, :] return dets -def bbox_vote(det,thre): + +def bbox_vote(det, thre): if det.shape[0] <= 1: return det order = det[:, 4].ravel().argsort()[::-1] diff --git a/keras_csp/bbox_process.pyc b/keras_csp/bbox_process.pyc deleted file mode 100644 index cf0149b..0000000 Binary files a/keras_csp/bbox_process.pyc and /dev/null differ diff --git a/keras_csp/bbox_transform.py b/keras_csp/bbox_transform.py index b7992ea..fbc8796 100644 --- a/keras_csp/bbox_transform.py +++ b/keras_csp/bbox_transform.py @@ -7,6 +7,7 @@ import numpy as np + def bbox_transform(ex_rois, gt_rois): ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0 ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0 @@ -27,6 +28,7 @@ def bbox_transform(ex_rois, gt_rois): (targets_dx, targets_dy, targets_dw, targets_dh)).transpose() return targets + def bbox_transform_inv(boxes, deltas): if boxes.shape[0] == 0: return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype) @@ -60,7 +62,8 @@ def bbox_transform_inv(boxes, deltas): return pred_boxes -def compute_targets(ex_rois, gt_rois, classifier_regr_std,std): + +def compute_targets(ex_rois, gt_rois, classifier_regr_std, std): """Compute bounding-box regression targets for an image.""" assert ex_rois.shape[0] == gt_rois.shape[0] @@ -68,11 +71,12 @@ def compute_targets(ex_rois, gt_rois, classifier_regr_std,std): assert gt_rois.shape[1] == 4 targets = bbox_transform(ex_rois, gt_rois) - # Optionally normalize targets by a precomputed mean and stdev + # Optionally normalize targets by a precomputed mean and stdev if std: - targets = targets/np.array(classifier_regr_std) + targets = targets / np.array(classifier_regr_std) return targets + def clip_boxes(boxes, im_shape): # boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0) # # y1 >= 0 diff --git a/keras_csp/bbox_transform.pyc b/keras_csp/bbox_transform.pyc deleted file mode 100644 index bf6f91e..0000000 Binary files a/keras_csp/bbox_transform.pyc and /dev/null differ diff --git a/keras_csp/config.py b/keras_csp/config.py index f46b12f..c739c8a 100755 --- a/keras_csp/config.py +++ b/keras_csp/config.py @@ -1,27 +1,26 @@ - class Config(object): - def __init__(self): - self.gpu_ids = '0' - self.onegpu = 2 - self.num_epochs = 150 - self.add_epoch = 0 - self.iter_per_epoch = 2000 - self.init_lr = 1e-4 - self.alpha = 0.999 + def __init__(self): + self.gpu_ids = '0' + self.onegpu = 2 + self.num_epochs = 150 + self.add_epoch = 0 + self.iter_per_epoch = 2000 + self.init_lr = 1e-4 + self.alpha = 0.999 - # setting for network architechture - self.network = 'resnet50' # or 'mobilenet' - self.point = 'center' # or 'top', 'bottom - self.scale = 'h' # or 'w', 'hw' - self.num_scale = 1 # 1 for height (or width) prediction, 2 for height+width prediction - self.offset = False # append offset prediction or not - self.down = 4 # downsampling rate of the feature map for detection - self.radius = 2 # surrounding areas of positives for the scale map + # setting for network architechture + self.network = 'resnet50' # or 'mobilenet' + self.point = 'center' # or 'top', 'bottom + self.scale = 'h' # or 'w', 'hw' + self.num_scale = 1 # 1 for height (or width) prediction, 2 for height+width prediction + self.offset = False # append offset prediction or not + self.down = 4 # downsampling rate of the feature map for detection + self.radius = 2 # surrounding areas of positives for the scale map - # setting for data augmentation - self.use_horizontal_flips = True - self.brightness = (0.5, 2, 0.5) - self.size_train = (336, 448) + # setting for data augmentation + self.use_horizontal_flips = True + self.brightness = (0.5, 2) + self.size_train = (336, 448) - # image channel-wise mean to subtract, the order is BGR - self.img_channel_mean = [103.939, 116.779, 123.68] + # image channel-wise mean to subtract, the order is BGR + self.img_channel_mean = [103.939, 116.779, 123.68] diff --git a/keras_csp/config.pyc b/keras_csp/config.pyc deleted file mode 100644 index 4f122d0..0000000 Binary files a/keras_csp/config.pyc and /dev/null differ diff --git a/keras_csp/data_augment.py b/keras_csp/data_augment.py index 599d69b..247ab76 100644 --- a/keras_csp/data_augment.py +++ b/keras_csp/data_augment.py @@ -1,4 +1,4 @@ -from __future__ import division + import cv2 import numpy as np import copy @@ -85,20 +85,21 @@ def random_crop(image, gts, igs, crop_size, limit=8): return cropped_image, gts, igs + def random_pave(image, gts, igs, pave_size, limit=8): img_height, img_width = image.shape[0:2] pave_h, pave_w = pave_size # paved_image = np.zeros((pave_h, pave_w, 3), dtype=image.dtype) - paved_image = np.ones((pave_h, pave_w, 3), dtype=image.dtype)*np.mean(image,dtype=int) - pave_x = int(np.random.randint(0, pave_w-img_width+1)) - pave_y = int(np.random.randint(0, pave_h-img_height+1)) - paved_image[pave_y:pave_y+img_height, pave_x:pave_x+img_width] = image + paved_image = np.ones((pave_h, pave_w, 3), dtype=image.dtype) * np.mean(image, dtype=int) + pave_x = int(np.random.randint(0, pave_w - img_width + 1)) + pave_y = int(np.random.randint(0, pave_h - img_height + 1)) + paved_image[pave_y:pave_y + img_height, pave_x:pave_x + img_width] = image # pave detections if len(igs) > 0: igs[:, 0:4:2] += pave_x igs[:, 1:4:2] += pave_y - keep_inds = ((igs[:, 2] - igs[:, 0]) >=8) & \ - ((igs[:, 3] - igs[:, 1]) >=8) + keep_inds = ((igs[:, 2] - igs[:, 0]) >= 8) & \ + ((igs[:, 3] - igs[:, 1]) >= 8) igs = igs[keep_inds] if len(gts) > 0: diff --git a/keras_csp/data_augment.pyc b/keras_csp/data_augment.pyc deleted file mode 100644 index 5029909..0000000 Binary files a/keras_csp/data_augment.pyc and /dev/null differ diff --git a/keras_csp/data_generators.py b/keras_csp/data_generators.py index 37018b5..ad51a46 100644 --- a/keras_csp/data_generators.py +++ b/keras_csp/data_generators.py @@ -1,292 +1,302 @@ -from __future__ import absolute_import -from __future__ import division # import numpy as np # import cv2 import random from . import data_augment from .bbox_transform import * -def calc_gt_center(C, img_data,r=2, down=4,scale='h',offset=True): - def gaussian(kernel): - sigma = ((kernel-1) * 0.5 - 1) * 0.3 + 0.8 - s = 2*(sigma**2) - dx = np.exp(-np.square(np.arange(kernel) - int(kernel / 2)) / s) - return np.reshape(dx,(-1,1)) - gts = np.copy(img_data['bboxes']) - igs = np.copy(img_data['ignoreareas']) - scale_map = np.zeros((int(C.size_train[0]/down), int(C.size_train[1]/down), 2)) - if scale=='hw': - scale_map = np.zeros((int(C.size_train[0] / down), int(C.size_train[1] / down), 3)) - if offset: - offset_map = np.zeros((int(C.size_train[0] / down), int(C.size_train[1] / down), 3)) - seman_map = np.zeros((int(C.size_train[0]/down), int(C.size_train[1]/down), 3)) - seman_map[:,:,1] = 1 - if len(igs) > 0: - igs = igs/down - for ind in range(len(igs)): - x1,y1,x2,y2 = int(igs[ind,0]), int(igs[ind,1]), int(np.ceil(igs[ind,2])), int(np.ceil(igs[ind,3])) - seman_map[y1:y2, x1:x2,1] = 0 - if len(gts)>0: - gts = gts/down - for ind in range(len(gts)): - # x1, y1, x2, y2 = int(round(gts[ind, 0])), int(round(gts[ind, 1])), int(round(gts[ind, 2])), int(round(gts[ind, 3])) - x1, y1, x2, y2 = int(np.ceil(gts[ind, 0])), int(np.ceil(gts[ind, 1])), int(gts[ind, 2]), int(gts[ind, 3]) - c_x, c_y = int((gts[ind, 0] + gts[ind, 2]) / 2), int((gts[ind, 1] + gts[ind, 3]) / 2) - dx = gaussian(x2-x1) - dy = gaussian(y2-y1) - gau_map = np.multiply(dy, np.transpose(dx)) - seman_map[y1:y2, x1:x2,0] = np.maximum(seman_map[y1:y2, x1:x2,0], gau_map) - seman_map[y1:y2, x1:x2,1] = 1 - seman_map[c_y, c_x, 2] = 1 - - if scale == 'h': - scale_map[c_y-r:c_y+r+1, c_x-r:c_x+r+1, 0] = np.log(gts[ind, 3] - gts[ind, 1]) - scale_map[c_y-r:c_y+r+1, c_x-r:c_x+r+1, 1] = 1 - elif scale=='w': - scale_map[c_y-r:c_y+r+1, c_x-r:c_x+r+1, 0] = np.log(gts[ind, 2] - gts[ind, 0]) - scale_map[c_y-r:c_y+r+1, c_x-r:c_x+r+1, 1] = 1 - elif scale=='hw': - scale_map[c_y-r:c_y+r+1, c_x-r:c_x+r+1, 0] = np.log(gts[ind, 3] - gts[ind, 1]) - scale_map[c_y-r:c_y+r+1, c_x-r:c_x+r+1, 1] = np.log(gts[ind, 2] - gts[ind, 0]) - scale_map[c_y-r:c_y+r+1, c_x-r:c_x+r+1, 2] = 1 - if offset: - offset_map[c_y, c_x, 0] = (gts[ind, 1] + gts[ind, 3]) / 2 - c_y - 0.5 - offset_map[c_y, c_x, 1] = (gts[ind, 0] + gts[ind, 2]) / 2 - c_x - 0.5 - offset_map[c_y, c_x, 2] = 1 - - if offset: - return seman_map,scale_map,offset_map - else: - return seman_map, scale_map - -def calc_gt_top(C, img_data,r=2): - def gaussian(kernel): - sigma = ((kernel-1) * 0.5 - 1) * 0.3 + 0.8 - s = 2*(sigma**2) - dx = np.exp(-np.square(np.arange(kernel) - int(kernel / 2)) / s) - return np.reshape(dx,(-1,1)) - gts = np.copy(img_data['bboxes']) - igs = np.copy(img_data['ignoreareas']) - scale_map = np.zeros((int(C.size_train[0]/4), int(C.size_train[1]/4), 2)) - seman_map = np.zeros((int(C.size_train[0]/4), int(C.size_train[1]/4), 3)) - seman_map[:,:,1] = 1 - if len(igs) > 0: - igs = igs/4 - for ind in range(len(igs)): - x1,y1,x2,y2 = int(igs[ind,0]), int(igs[ind,1]), int(np.ceil(igs[ind,2])), int(np.ceil(igs[ind,3])) - seman_map[y1:y2, x1:x2,1] = 0 - if len(gts)>0: - gts = gts/4 - for ind in range(len(gts)): - x1, y1, x2, y2 = int(round(gts[ind, 0])), int(round(gts[ind, 1])), int(round(gts[ind, 2])), int(round(gts[ind, 3])) - w = x2-x1 - c_x = int((gts[ind, 0] + gts[ind, 2]) / 2) - - dx = gaussian(w) - dy = gaussian(w) - gau_map = np.multiply(dy, np.transpose(dx)) - - ty = np.maximum(0,int(round(y1-w/2))) - ot = ty-int(round(y1-w/2)) - seman_map[ty:ty+w-ot, x1:x2,0] = np.maximum(seman_map[ty:ty+w-ot, x1:x2,0], gau_map[ot:,:]) - seman_map[ty:ty+w-ot, x1:x2,1] = 1 - seman_map[y1, c_x, 2] = 1 - - scale_map[y1-r:y1+r+1, c_x-r:c_x+r+1, 0] = np.log(gts[ind,3]-gts[ind,1]) - scale_map[y1-r:y1+r+1, c_x-r:c_x+r+1, 1] = 1 - return seman_map,scale_map + +def calc_gt_center(C, img_data, r=2, down=4, scale='h', offset=True): + def gaussian(kernel): + sigma = ((kernel - 1) * 0.5 - 1) * 0.3 + 0.8 + s = 2 * (sigma ** 2) + dx = np.exp(-np.square(np.arange(kernel) - int(kernel / 2)) / s) + return np.reshape(dx, (-1, 1)) + + gts = np.copy(img_data['bboxes']) + igs = np.copy(img_data['ignoreareas']) + scale_map = np.zeros((int(C.size_train[0] / down), int(C.size_train[1] / down), 2)) + if scale == 'hw': + scale_map = np.zeros((int(C.size_train[0] / down), int(C.size_train[1] / down), 3)) + if offset: + offset_map = np.zeros((int(C.size_train[0] / down), int(C.size_train[1] / down), 3)) + seman_map = np.zeros((int(C.size_train[0] / down), int(C.size_train[1] / down), 3)) + seman_map[:, :, 1] = 1 + if len(igs) > 0: + igs = igs / down + for ind in range(len(igs)): + x1, y1, x2, y2 = int(igs[ind, 0]), int(igs[ind, 1]), int(np.ceil(igs[ind, 2])), int(np.ceil(igs[ind, 3])) + seman_map[y1:y2, x1:x2, 1] = 0 + if len(gts) > 0: + gts = gts / down + for ind in range(len(gts)): + # x1, y1, x2, y2 = int(round(gts[ind, 0])), int(round(gts[ind, 1])), int(round(gts[ind, 2])), int(round(gts[ind, 3])) + x1, y1, x2, y2 = int(np.ceil(gts[ind, 0])), int(np.ceil(gts[ind, 1])), int(gts[ind, 2]), int(gts[ind, 3]) + c_x, c_y = int((gts[ind, 0] + gts[ind, 2]) / 2), int((gts[ind, 1] + gts[ind, 3]) / 2) + dx = gaussian(x2 - x1) + dy = gaussian(y2 - y1) + gau_map = np.multiply(dy, np.transpose(dx)) + seman_map[y1:y2, x1:x2, 0] = np.maximum(seman_map[y1:y2, x1:x2, 0], gau_map) + seman_map[y1:y2, x1:x2, 1] = 1 + seman_map[c_y, c_x, 2] = 1 + + if scale == 'h': + scale_map[c_y - r:c_y + r + 1, c_x - r:c_x + r + 1, 0] = np.log(gts[ind, 3] - gts[ind, 1]) + scale_map[c_y - r:c_y + r + 1, c_x - r:c_x + r + 1, 1] = 1 + elif scale == 'w': + scale_map[c_y - r:c_y + r + 1, c_x - r:c_x + r + 1, 0] = np.log(gts[ind, 2] - gts[ind, 0]) + scale_map[c_y - r:c_y + r + 1, c_x - r:c_x + r + 1, 1] = 1 + elif scale == 'hw': + scale_map[c_y - r:c_y + r + 1, c_x - r:c_x + r + 1, 0] = np.log(gts[ind, 3] - gts[ind, 1]) + scale_map[c_y - r:c_y + r + 1, c_x - r:c_x + r + 1, 1] = np.log(gts[ind, 2] - gts[ind, 0]) + scale_map[c_y - r:c_y + r + 1, c_x - r:c_x + r + 1, 2] = 1 + if offset: + offset_map[c_y, c_x, 0] = (gts[ind, 1] + gts[ind, 3]) / 2 - c_y - 0.5 + offset_map[c_y, c_x, 1] = (gts[ind, 0] + gts[ind, 2]) / 2 - c_x - 0.5 + offset_map[c_y, c_x, 2] = 1 + + if offset: + return seman_map, scale_map, offset_map + else: + return seman_map, scale_map + + +def calc_gt_top(C, img_data, r=2): + def gaussian(kernel): + sigma = ((kernel - 1) * 0.5 - 1) * 0.3 + 0.8 + s = 2 * (sigma ** 2) + dx = np.exp(-np.square(np.arange(kernel) - int(kernel / 2)) / s) + return np.reshape(dx, (-1, 1)) + + gts = np.copy(img_data['bboxes']) + igs = np.copy(img_data['ignoreareas']) + scale_map = np.zeros((int(C.size_train[0] / 4), int(C.size_train[1] / 4), 2)) + seman_map = np.zeros((int(C.size_train[0] / 4), int(C.size_train[1] / 4), 3)) + seman_map[:, :, 1] = 1 + if len(igs) > 0: + igs = igs / 4 + for ind in range(len(igs)): + x1, y1, x2, y2 = int(igs[ind, 0]), int(igs[ind, 1]), int(np.ceil(igs[ind, 2])), int(np.ceil(igs[ind, 3])) + seman_map[y1:y2, x1:x2, 1] = 0 + if len(gts) > 0: + gts = gts / 4 + for ind in range(len(gts)): + x1, y1, x2, y2 = int(round(gts[ind, 0])), int(round(gts[ind, 1])), int(round(gts[ind, 2])), int( + round(gts[ind, 3])) + w = x2 - x1 + c_x = int((gts[ind, 0] + gts[ind, 2]) / 2) + + dx = gaussian(w) + dy = gaussian(w) + gau_map = np.multiply(dy, np.transpose(dx)) + + ty = np.maximum(0, int(round(y1 - w / 2))) + ot = ty - int(round(y1 - w / 2)) + seman_map[ty:ty + w - ot, x1:x2, 0] = np.maximum(seman_map[ty:ty + w - ot, x1:x2, 0], gau_map[ot:, :]) + seman_map[ty:ty + w - ot, x1:x2, 1] = 1 + seman_map[y1, c_x, 2] = 1 + + scale_map[y1 - r:y1 + r + 1, c_x - r:c_x + r + 1, 0] = np.log(gts[ind, 3] - gts[ind, 1]) + scale_map[y1 - r:y1 + r + 1, c_x - r:c_x + r + 1, 1] = 1 + return seman_map, scale_map + def calc_gt_bottom(C, img_data, r=2): - def gaussian(kernel): - sigma = ((kernel-1) * 0.5 - 1) * 0.3 + 0.8 - s = 2*(sigma**2) - dx = np.exp(-np.square(np.arange(kernel) - int(kernel / 2)) / s) - return np.reshape(dx,(-1,1)) - gts = np.copy(img_data['bboxes']) - igs = np.copy(img_data['ignoreareas']) - scale_map = np.zeros((int(C.size_train[0]/4), int(C.size_train[1]/4), 2)) - seman_map = np.zeros((int(C.size_train[0]/4), int(C.size_train[1]/4), 3)) - seman_map[:,:,1] = 1 - if len(igs) > 0: - igs = igs/4 - for ind in range(len(igs)): - x1,y1,x2,y2 = int(igs[ind,0]), int(igs[ind,1]), int(np.ceil(igs[ind,2])), int(np.ceil(igs[ind,3])) - seman_map[y1:y2, x1:x2,1] = 0 - if len(gts)>0: - gts = gts/4 - for ind in range(len(gts)): - x1, y1, x2, y2 = int(np.ceil(gts[ind, 0])), int(np.ceil(gts[ind, 1])), int(gts[ind, 2]), int(gts[ind, 3]) - y2 = np.minimum(int(C.random_crop[0] / 4) - 1, y2) - w = x2 - x1 - c_x = int((gts[ind, 0] + gts[ind, 2]) / 2) - dx = gaussian(w) - dy = gaussian(w) - gau_map = np.multiply(dy, np.transpose(dx)) - - by = np.minimum(int(C.random_crop[0]/4)-1, int(round(y2+w/2))) - ob = int(round(y2+w/2))-by - seman_map[by-w+ob:by, x1:x2, 0] = np.maximum(seman_map[by-w+ob:by, x1:x2, 0], gau_map[:w-ob, :]) - seman_map[by-w+ob:by, x1:x2, 1] = 1 - seman_map[y2, c_x, 2] = 1 - - scale_map[y2-r:y2+r+1, c_x-r:c_x+r+1, 0] = np.log(gts[ind,3]-gts[ind,1]) - scale_map[y2-r:y2+r+1, c_x-r:c_x+r+1, 1] = 1 - - return seman_map,scale_map - -def get_data(ped_data, C, batchsize = 8): - current_ped = 0 - while True: - x_img_batch, y_seman_batch, y_height_batch, y_offset_batch = [], [], [], [] - if current_ped>len(ped_data)-batchsize: - random.shuffle(ped_data) - current_ped = 0 - for img_data in ped_data[current_ped:current_ped + batchsize]: - try: - img_data, x_img = data_augment.augment(img_data, C) - if C.offset: - y_seman, y_height, y_offset = calc_gt_center(C, img_data, down=C.down, scale=C.scale, offset=True) - else: - if C.point == 'top': - y_seman, y_height = calc_gt_top(C, img_data) - elif C.point == 'bottom': - y_seman, y_height = calc_gt_bottom(C, img_data) - else: - y_seman, y_height = calc_gt_center(C, img_data,down=C.down, scale=C.scale, offset=False) - - x_img = x_img.astype(np.float32) - x_img[:, :, 0] -= C.img_channel_mean[0] - x_img[:, :, 1] -= C.img_channel_mean[1] - x_img[:, :, 2] -= C.img_channel_mean[2] - - x_img_batch.append(np.expand_dims(x_img, axis=0)) - y_seman_batch.append(np.expand_dims(y_seman, axis=0)) - y_height_batch.append(np.expand_dims(y_height, axis=0)) - if C.offset: - y_offset_batch.append(np.expand_dims(y_offset, axis=0)) - except Exception as e: - print ('get_batch_gt:',e) - x_img_batch = np.concatenate(x_img_batch,axis=0) - y_seman_batch = np.concatenate(y_seman_batch, axis=0) - y_height_batch = np.concatenate(y_height_batch, axis=0) - if C.offset: - y_offset_batch = np.concatenate(y_offset_batch, axis=0) - current_ped += batchsize - if C.offset: - yield np.copy(x_img_batch), [np.copy(y_seman_batch), np.copy(y_height_batch), np.copy(y_offset_batch)] - else: - yield np.copy(x_img_batch), [np.copy(y_seman_batch), np.copy(y_height_batch)] - -def get_data_hybrid(ped_data, emp_data, C, batchsize = 8,hyratio=0.5): - current_ped = 0 - current_emp = 0 - batchsize_ped = int(batchsize * hyratio) - batchsize_emp = batchsize - batchsize_ped - while True: - x_img_batch, y_seman_batch, y_height_batch, y_offset_batch = [], [], [], [] - if current_ped>len(ped_data)-batchsize_ped: - random.shuffle(ped_data) - current_ped = 0 - if current_emp>len(emp_data)-batchsize_emp: - random.shuffle(emp_data) - current_emp = 0 - for img_data in ped_data[current_ped:current_ped + batchsize_ped]: - try: - img_data, x_img = data_augment.augment(img_data, C) - if C.offset: - y_seman, y_height, y_offset = calc_gt_center(C, img_data, down=C.down, scale=C.scale, offset=C.offset) - else: - if C.point == 'top': - y_seman, y_height = calc_gt_top(C, img_data) - elif C.point == 'bottom': - y_seman, y_height = calc_gt_bottom(C, img_data) - else: - y_seman, y_height = calc_gt_center(C, img_data,down=C.down, scale=C.scale, offset=False) - - x_img = x_img.astype(np.float32) - x_img[:, :, 0] -= C.img_channel_mean[0] - x_img[:, :, 1] -= C.img_channel_mean[1] - x_img[:, :, 2] -= C.img_channel_mean[2] - - x_img_batch.append(np.expand_dims(x_img, axis=0)) - y_seman_batch.append(np.expand_dims(y_seman, axis=0)) - y_height_batch.append(np.expand_dims(y_height, axis=0)) - if C.offset: - y_offset_batch.append(np.expand_dims(y_offset, axis=0)) - - except Exception as e: - print ('get_batch_gt:',e) - for img_data in emp_data[current_emp:current_emp + batchsize_emp]: - try: - img_data, x_img = data_augment.augment(img_data, C) - if C.offset: - y_seman, y_height, y_offset = calc_gt_center(C, img_data, down=C.down, scale=C.scale, offset=C.offset) - else: - if C.point == 'top': - y_seman, y_height = calc_gt_top(C, img_data) - elif C.point == 'bottom': - y_seman, y_height = calc_gt_bottom(C, img_data) - else: - y_seman, y_height = calc_gt_center(C, img_data,down=C.down, scale=C.scale, offset=False) - - x_img = x_img.astype(np.float32) - x_img[:, :, 0] -= C.img_channel_mean[0] - x_img[:, :, 1] -= C.img_channel_mean[1] - x_img[:, :, 2] -= C.img_channel_mean[2] - - x_img_batch.append(np.expand_dims(x_img, axis=0)) - y_seman_batch.append(np.expand_dims(y_seman, axis=0)) - y_height_batch.append(np.expand_dims(y_height, axis=0)) - if C.offset: - y_offset_batch.append(np.expand_dims(y_offset, axis=0)) - except Exception as e: - print ('get_batch_gt_emp:',e) - x_img_batch = np.concatenate(x_img_batch,axis=0) - y_seman_batch = np.concatenate(y_seman_batch, axis=0) - y_height_batch = np.concatenate(y_height_batch, axis=0) - if C.offset: - y_offset_batch = np.concatenate(y_offset_batch, axis=0) - current_ped += batchsize_ped - current_emp += batchsize_emp - if C.offset: - yield np.copy(x_img_batch), [np.copy(y_seman_batch), np.copy(y_height_batch), np.copy(y_offset_batch)] - else: - yield np.copy(x_img_batch), [np.copy(y_seman_batch), np.copy(y_height_batch)] - -def get_data_wider(ped_data, C, batchsize = 8): - current_ped = 0 - while True: - x_img_batch, y_seman_batch, y_height_batch, y_offset_batch = [], [], [], [] - if current_ped>len(ped_data)-batchsize: - random.shuffle(ped_data) - current_ped = 0 - for img_data in ped_data[current_ped:current_ped + batchsize]: - try: - img_data, x_img = data_augment.augment_wider(img_data, C) - if C.offset: - y_seman, y_height, y_offset = calc_gt_center(C, img_data, down=C.down, scale=C.scale, offset=True) - else: - y_seman, y_height = calc_gt_center(C, img_data,down=C.down, scale=C.scale, offset=False) - - x_img = x_img.astype(np.float32) - x_img[:, :, 0] -= C.img_channel_mean[0] - x_img[:, :, 1] -= C.img_channel_mean[1] - x_img[:, :, 2] -= C.img_channel_mean[2] - - x_img_batch.append(np.expand_dims(x_img, axis=0)) - y_seman_batch.append(np.expand_dims(y_seman, axis=0)) - y_height_batch.append(np.expand_dims(y_height, axis=0)) - if C.offset: - y_offset_batch.append(np.expand_dims(y_offset, axis=0)) - except Exception as e: - print ('get_batch_gt:',e) - x_img_batch = np.concatenate(x_img_batch,axis=0) - y_seman_batch = np.concatenate(y_seman_batch, axis=0) - y_height_batch = np.concatenate(y_height_batch, axis=0) - if C.offset: - y_offset_batch = np.concatenate(y_offset_batch, axis=0) - current_ped += batchsize - if C.offset: - yield np.copy(x_img_batch), [np.copy(y_seman_batch), np.copy(y_height_batch), np.copy(y_offset_batch)] - else: - yield np.copy(x_img_batch), [np.copy(y_seman_batch), np.copy(y_height_batch)] + def gaussian(kernel): + sigma = ((kernel - 1) * 0.5 - 1) * 0.3 + 0.8 + s = 2 * (sigma ** 2) + dx = np.exp(-np.square(np.arange(kernel) - int(kernel / 2)) / s) + return np.reshape(dx, (-1, 1)) + + gts = np.copy(img_data['bboxes']) + igs = np.copy(img_data['ignoreareas']) + scale_map = np.zeros((int(C.size_train[0] / 4), int(C.size_train[1] / 4), 2)) + seman_map = np.zeros((int(C.size_train[0] / 4), int(C.size_train[1] / 4), 3)) + seman_map[:, :, 1] = 1 + if len(igs) > 0: + igs = igs / 4 + for ind in range(len(igs)): + x1, y1, x2, y2 = int(igs[ind, 0]), int(igs[ind, 1]), int(np.ceil(igs[ind, 2])), int(np.ceil(igs[ind, 3])) + seman_map[y1:y2, x1:x2, 1] = 0 + if len(gts) > 0: + gts = gts / 4 + for ind in range(len(gts)): + x1, y1, x2, y2 = int(np.ceil(gts[ind, 0])), int(np.ceil(gts[ind, 1])), int(gts[ind, 2]), int(gts[ind, 3]) + y2 = np.minimum(int(C.random_crop[0] / 4) - 1, y2) + w = x2 - x1 + c_x = int((gts[ind, 0] + gts[ind, 2]) / 2) + dx = gaussian(w) + dy = gaussian(w) + gau_map = np.multiply(dy, np.transpose(dx)) + + by = np.minimum(int(C.random_crop[0] / 4) - 1, int(round(y2 + w / 2))) + ob = int(round(y2 + w / 2)) - by + seman_map[by - w + ob:by, x1:x2, 0] = np.maximum(seman_map[by - w + ob:by, x1:x2, 0], gau_map[:w - ob, :]) + seman_map[by - w + ob:by, x1:x2, 1] = 1 + seman_map[y2, c_x, 2] = 1 + + scale_map[y2 - r:y2 + r + 1, c_x - r:c_x + r + 1, 0] = np.log(gts[ind, 3] - gts[ind, 1]) + scale_map[y2 - r:y2 + r + 1, c_x - r:c_x + r + 1, 1] = 1 + + return seman_map, scale_map + + +def get_data(ped_data, C, batchsize=8): + current_ped = 0 + while True: + x_img_batch, y_seman_batch, y_height_batch, y_offset_batch = [], [], [], [] + if current_ped > len(ped_data) - batchsize: + random.shuffle(ped_data) + current_ped = 0 + for img_data in ped_data[current_ped:current_ped + batchsize]: + try: + img_data, x_img = data_augment.augment(img_data, C) + if C.offset: + y_seman, y_height, y_offset = calc_gt_center(C, img_data, down=C.down, scale=C.scale, offset=True) + else: + if C.point == 'top': + y_seman, y_height = calc_gt_top(C, img_data) + elif C.point == 'bottom': + y_seman, y_height = calc_gt_bottom(C, img_data) + else: + y_seman, y_height = calc_gt_center(C, img_data, down=C.down, scale=C.scale, offset=False) + + x_img = x_img.astype(np.float32) + x_img[:, :, 0] -= C.img_channel_mean[0] + x_img[:, :, 1] -= C.img_channel_mean[1] + x_img[:, :, 2] -= C.img_channel_mean[2] + + x_img_batch.append(np.expand_dims(x_img, axis=0)) + y_seman_batch.append(np.expand_dims(y_seman, axis=0)) + y_height_batch.append(np.expand_dims(y_height, axis=0)) + if C.offset: + y_offset_batch.append(np.expand_dims(y_offset, axis=0)) + except Exception as e: + print(('get_batch_gt:', e)) + x_img_batch = np.concatenate(x_img_batch, axis=0) + y_seman_batch = np.concatenate(y_seman_batch, axis=0) + y_height_batch = np.concatenate(y_height_batch, axis=0) + if C.offset: + y_offset_batch = np.concatenate(y_offset_batch, axis=0) + current_ped += batchsize + if C.offset: + yield np.copy(x_img_batch), [np.copy(y_seman_batch), np.copy(y_height_batch), np.copy(y_offset_batch)] + else: + yield np.copy(x_img_batch), [np.copy(y_seman_batch), np.copy(y_height_batch)] + + +def get_data_hybrid(ped_data, emp_data, C, batchsize=8, hyratio=0.5): + current_ped = 0 + current_emp = 0 + batchsize_ped = int(batchsize * hyratio) + batchsize_emp = batchsize - batchsize_ped + while True: + x_img_batch, y_seman_batch, y_height_batch, y_offset_batch = [], [], [], [] + if current_ped > len(ped_data) - batchsize_ped: + random.shuffle(ped_data) + current_ped = 0 + if current_emp > len(emp_data) - batchsize_emp: + random.shuffle(emp_data) + current_emp = 0 + for img_data in ped_data[current_ped:current_ped + batchsize_ped]: + try: + img_data, x_img = data_augment.augment(img_data, C) + if C.offset: + y_seman, y_height, y_offset = calc_gt_center(C, img_data, down=C.down, scale=C.scale, + offset=C.offset) + else: + if C.point == 'top': + y_seman, y_height = calc_gt_top(C, img_data) + elif C.point == 'bottom': + y_seman, y_height = calc_gt_bottom(C, img_data) + else: + y_seman, y_height = calc_gt_center(C, img_data, down=C.down, scale=C.scale, offset=False) + + x_img = x_img.astype(np.float32) + x_img[:, :, 0] -= C.img_channel_mean[0] + x_img[:, :, 1] -= C.img_channel_mean[1] + x_img[:, :, 2] -= C.img_channel_mean[2] + + x_img_batch.append(np.expand_dims(x_img, axis=0)) + y_seman_batch.append(np.expand_dims(y_seman, axis=0)) + y_height_batch.append(np.expand_dims(y_height, axis=0)) + if C.offset: + y_offset_batch.append(np.expand_dims(y_offset, axis=0)) + + except Exception as e: + print(('get_batch_gt:', e)) + for img_data in emp_data[current_emp:current_emp + batchsize_emp]: + try: + img_data, x_img = data_augment.augment(img_data, C) + if C.offset: + y_seman, y_height, y_offset = calc_gt_center(C, img_data, down=C.down, scale=C.scale, + offset=C.offset) + else: + if C.point == 'top': + y_seman, y_height = calc_gt_top(C, img_data) + elif C.point == 'bottom': + y_seman, y_height = calc_gt_bottom(C, img_data) + else: + y_seman, y_height = calc_gt_center(C, img_data, down=C.down, scale=C.scale, offset=False) + + x_img = x_img.astype(np.float32) + x_img[:, :, 0] -= C.img_channel_mean[0] + x_img[:, :, 1] -= C.img_channel_mean[1] + x_img[:, :, 2] -= C.img_channel_mean[2] + + x_img_batch.append(np.expand_dims(x_img, axis=0)) + y_seman_batch.append(np.expand_dims(y_seman, axis=0)) + y_height_batch.append(np.expand_dims(y_height, axis=0)) + if C.offset: + y_offset_batch.append(np.expand_dims(y_offset, axis=0)) + except Exception as e: + print(('get_batch_gt_emp:', e)) + x_img_batch = np.concatenate(x_img_batch, axis=0) + y_seman_batch = np.concatenate(y_seman_batch, axis=0) + y_height_batch = np.concatenate(y_height_batch, axis=0) + if C.offset: + y_offset_batch = np.concatenate(y_offset_batch, axis=0) + current_ped += batchsize_ped + current_emp += batchsize_emp + if C.offset: + yield np.copy(x_img_batch), [np.copy(y_seman_batch), np.copy(y_height_batch), np.copy(y_offset_batch)] + else: + yield np.copy(x_img_batch), [np.copy(y_seman_batch), np.copy(y_height_batch)] + + +def get_data_wider(ped_data, C, batchsize=8): + current_ped = 0 + while True: + x_img_batch, y_seman_batch, y_height_batch, y_offset_batch = [], [], [], [] + if current_ped > len(ped_data) - batchsize: + random.shuffle(ped_data) + current_ped = 0 + for img_data in ped_data[current_ped:current_ped + batchsize]: + try: + img_data, x_img = data_augment.augment_wider(img_data, C) + if C.offset: + y_seman, y_height, y_offset = calc_gt_center(C, img_data, down=C.down, scale=C.scale, offset=True) + else: + y_seman, y_height = calc_gt_center(C, img_data, down=C.down, scale=C.scale, offset=False) + + x_img = x_img.astype(np.float32) + x_img[:, :, 0] -= C.img_channel_mean[0] + x_img[:, :, 1] -= C.img_channel_mean[1] + x_img[:, :, 2] -= C.img_channel_mean[2] + + x_img_batch.append(np.expand_dims(x_img, axis=0)) + y_seman_batch.append(np.expand_dims(y_seman, axis=0)) + y_height_batch.append(np.expand_dims(y_height, axis=0)) + if C.offset: + y_offset_batch.append(np.expand_dims(y_offset, axis=0)) + except Exception as e: + print(('get_batch_gt:', e)) + x_img_batch = np.concatenate(x_img_batch, axis=0) + y_seman_batch = np.concatenate(y_seman_batch, axis=0) + y_height_batch = np.concatenate(y_height_batch, axis=0) + if C.offset: + y_offset_batch = np.concatenate(y_offset_batch, axis=0) + current_ped += batchsize + if C.offset: + yield np.copy(x_img_batch), [np.copy(y_seman_batch), np.copy(y_height_batch), np.copy(y_offset_batch)] + else: + yield np.copy(x_img_batch), [np.copy(y_seman_batch), np.copy(y_height_batch)] diff --git a/keras_csp/data_generators.pyc b/keras_csp/data_generators.pyc deleted file mode 100644 index dbe5135..0000000 Binary files a/keras_csp/data_generators.pyc and /dev/null differ diff --git a/keras_csp/keras_layer_L2Normalization.py b/keras_csp/keras_layer_L2Normalization.py index 5f1b9c2..7b15e33 100644 --- a/keras_csp/keras_layer_L2Normalization.py +++ b/keras_csp/keras_layer_L2Normalization.py @@ -22,6 +22,7 @@ from keras.engine.topology import Layer import numpy as np + class L2Normalization(Layer): ''' Performs L2 normalization on the input tensor with a learnable scaling parameter diff --git a/keras_csp/keras_layer_L2Normalization.pyc b/keras_csp/keras_layer_L2Normalization.pyc deleted file mode 100644 index f1b924b..0000000 Binary files a/keras_csp/keras_layer_L2Normalization.pyc and /dev/null differ diff --git a/keras_csp/losses.py b/keras_csp/losses.py index 499994d..1e870dd 100644 --- a/keras_csp/losses.py +++ b/keras_csp/losses.py @@ -2,63 +2,65 @@ from keras.objectives import categorical_crossentropy if K.image_dim_ordering() == 'tf': - import tensorflow as tf + import tensorflow as tf epsilon = 1e-4 def cls_center(y_true, y_pred): + classification_loss = K.binary_crossentropy(y_pred[:, :, :, 0], y_true[:, :, :, 2]) + # firstly we compute the focal weight + positives = y_true[:, :, :, 2] + negatives = y_true[:, :, :, 1] - y_true[:, :, :, 2] + foreground_weight = positives * (1.0 - y_pred[:, :, :, 0]) ** 2.0 + # foreground_weight = positives + background_weight = negatives * ((1.0 - y_true[:, :, :, 0]) ** 4.0) * (y_pred[:, :, :, 0] ** 2.0) + # background_weight = negatives * ((1.0 - y_true[:, :, :, 0])**4.0)*(0.01 ** 2.0) - classification_loss = K.binary_crossentropy(y_pred[:, :, :, 0], y_true[:, :, :, 2]) - # firstly we compute the focal weight - positives = y_true[:, :, :, 2] - negatives = y_true[:, :, :, 1]-y_true[:, :, :, 2] - foreground_weight = positives * (1.0 - y_pred[:, :, :, 0]) ** 2.0 - # foreground_weight = positives - background_weight = negatives * ((1.0 - y_true[:, :, :, 0])**4.0)*(y_pred[:, :, :, 0] ** 2.0) - # background_weight = negatives * ((1.0 - y_true[:, :, :, 0])**4.0)*(0.01 ** 2.0) + # foreground_weight = y_true[:, :, :, 0] * (1- y_pred[:, :, :, 0]) ** 2.0 + # background_weight = negatives * y_pred[:, :, :, 0] ** 2.0 - # foreground_weight = y_true[:, :, :, 0] * (1- y_pred[:, :, :, 0]) ** 2.0 - # background_weight = negatives * y_pred[:, :, :, 0] ** 2.0 + focal_weight = foreground_weight + background_weight - focal_weight = foreground_weight + background_weight + assigned_boxes = tf.reduce_sum(y_true[:, :, :, 2]) + class_loss = 0.01 * tf.reduce_sum(focal_weight * classification_loss) / tf.maximum(1.0, assigned_boxes) - assigned_boxes = tf.reduce_sum(y_true[:, :, :, 2]) - class_loss = 0.01*tf.reduce_sum(focal_weight*classification_loss) / tf.maximum(1.0, assigned_boxes) + # assigned_boxes = tf.reduce_sum(tf.reduce_sum(y_true[:, :, :, 1], axis=-1), axis=-1) + # class_loss = tf.reduce_sum(tf.reduce_sum(classification_loss, axis=-1), axis=-1) / tf.maximum(1.0, assigned_boxes) - # assigned_boxes = tf.reduce_sum(tf.reduce_sum(y_true[:, :, :, 1], axis=-1), axis=-1) - # class_loss = tf.reduce_sum(tf.reduce_sum(classification_loss, axis=-1), axis=-1) / tf.maximum(1.0, assigned_boxes) + return class_loss - return class_loss def regr_h(y_true, y_pred): + absolute_loss = tf.abs(y_true[:, :, :, 0] - y_pred[:, :, :, 0]) / (y_true[:, :, :, 0] + 1e-10) + square_loss = 0.5 * ((y_true[:, :, :, 0] - y_pred[:, :, :, 0]) / (y_true[:, :, :, 0] + 1e-10)) ** 2 - absolute_loss = tf.abs(y_true[:, :, :, 0] - y_pred[:, :, :, 0])/(y_true[:, :, :, 0]+1e-10) - square_loss = 0.5 * ((y_true[:, :, :, 0] - y_pred[:, :, :, 0])/(y_true[:, :, :, 0]+1e-10)) ** 2 + l1_loss = y_true[:, :, :, 1] * tf.where(tf.less(absolute_loss, 1.0), square_loss, absolute_loss - 0.5) - l1_loss = y_true[:, :, :, 1]*tf.where(tf.less(absolute_loss, 1.0), square_loss, absolute_loss - 0.5) + assigned_boxes = tf.reduce_sum(y_true[:, :, :, 1]) + class_loss = tf.reduce_sum(l1_loss) / tf.maximum(1.0, assigned_boxes) - assigned_boxes = tf.reduce_sum(y_true[:, :, :, 1]) - class_loss = tf.reduce_sum(l1_loss) / tf.maximum(1.0, assigned_boxes) + return class_loss - return class_loss def regr_hw(y_true, y_pred): - absolute_loss = tf.abs(y_true[:, :, :, :2] - y_pred[:, :, :, :]) / (y_true[:, :, :, :2] + 1e-10) - square_loss = 0.5 * ((y_true[:, :, :, :2] - y_pred[:, :, :, :]) / (y_true[:, :, :, :2] + 1e-10)) ** 2 - loss = y_true[:, :, :, 2] * tf.reduce_sum(tf.where(tf.less(absolute_loss, 1.0), square_loss, absolute_loss - 0.5),axis=-1) - assigned_boxes = tf.reduce_sum(y_true[:, :, :, 2]) - class_loss = tf.reduce_sum(loss) / tf.maximum(1.0, assigned_boxes) + absolute_loss = tf.abs(y_true[:, :, :, :2] - y_pred[:, :, :, :]) / (y_true[:, :, :, :2] + 1e-10) + square_loss = 0.5 * ((y_true[:, :, :, :2] - y_pred[:, :, :, :]) / (y_true[:, :, :, :2] + 1e-10)) ** 2 + loss = y_true[:, :, :, 2] * tf.reduce_sum(tf.where(tf.less(absolute_loss, 1.0), square_loss, absolute_loss - 0.5), + axis=-1) + assigned_boxes = tf.reduce_sum(y_true[:, :, :, 2]) + class_loss = tf.reduce_sum(loss) / tf.maximum(1.0, assigned_boxes) - return class_loss + return class_loss -def regr_offset(y_true, y_pred): - absolute_loss = tf.abs(y_true[:, :, :, :2] - y_pred[:, :, :, :]) - square_loss = 0.5 * (y_true[:, :, :, :2] - y_pred[:, :, :, :]) ** 2 - l1_loss = y_true[:, :, :, 2] * tf.reduce_sum(tf.where(tf.less(absolute_loss, 1.0), square_loss, absolute_loss - 0.5), axis=-1) +def regr_offset(y_true, y_pred): + absolute_loss = tf.abs(y_true[:, :, :, :2] - y_pred[:, :, :, :]) + square_loss = 0.5 * (y_true[:, :, :, :2] - y_pred[:, :, :, :]) ** 2 + l1_loss = y_true[:, :, :, 2] * tf.reduce_sum( + tf.where(tf.less(absolute_loss, 1.0), square_loss, absolute_loss - 0.5), axis=-1) - assigned_boxes = tf.reduce_sum(y_true[:, :, :, 2]) - class_loss = 0.1*tf.reduce_sum(l1_loss) / tf.maximum(1.0, assigned_boxes) + assigned_boxes = tf.reduce_sum(y_true[:, :, :, 2]) + class_loss = 0.1 * tf.reduce_sum(l1_loss) / tf.maximum(1.0, assigned_boxes) - return class_loss + return class_loss diff --git a/keras_csp/losses.pyc b/keras_csp/losses.pyc deleted file mode 100644 index 47003c5..0000000 Binary files a/keras_csp/losses.pyc and /dev/null differ diff --git a/keras_csp/mobilenet.py b/keras_csp/mobilenet.py index d5b936d..e8855f8 100644 --- a/keras_csp/mobilenet.py +++ b/keras_csp/mobilenet.py @@ -1,6 +1,6 @@ -from __future__ import print_function -from __future__ import absolute_import -from __future__ import division + + + from keras.layers import * from keras import backend as K import numpy as np @@ -477,12 +477,13 @@ def nn_p2p3p4p5(img_input=None, alpha=1.0, depth_multiplier=1, trainable=True): return [x_class, x_regr] + # focal loss like def prior_probability_onecls(num_class=1, probability=0.01): - def f(shape, dtype=keras.backend.floatx()): - assert(shape[0] % num_class == 0) - # set bias to -log((1 - p)/p) for foregound - result = np.ones(shape, dtype=dtype) * -math.log((1 - probability) / probability) - # set bias to -log(p/(1 - p)) for background - return result - return f + def f(shape, dtype=keras.backend.floatx()): + assert(shape[0] % num_class == 0) + # set bias to -log((1 - p)/p) for foregound + result = np.ones(shape, dtype=dtype) * -math.log((1 - probability) / probability) + # set bias to -log(p/(1 - p)) for background + return result + return f diff --git a/keras_csp/mobilenet.pyc b/keras_csp/mobilenet.pyc deleted file mode 100644 index 71946eb..0000000 Binary files a/keras_csp/mobilenet.pyc and /dev/null differ diff --git a/keras_csp/nms/__init__.pyc b/keras_csp/nms/__init__.pyc deleted file mode 100644 index f3d0bba..0000000 Binary files a/keras_csp/nms/__init__.pyc and /dev/null differ diff --git a/keras_csp/nms_wrapper.py b/keras_csp/nms_wrapper.py index c34cb15..7bde7d0 100644 --- a/keras_csp/nms_wrapper.py +++ b/keras_csp/nms_wrapper.py @@ -4,9 +4,9 @@ # Licensed under The MIT License [see LICENSE for details] # Written by Ross Girshick # -------------------------------------------------------- - -from nms.gpu_nms import gpu_nms -from nms.cpu_nms import cpu_nms +import pyximport; pyximport.install() +from keras_csp.nms.gpu_nms import gpu_nms +from keras_csp.nms.cpu_nms import cpu_nms import numpy as np def soft_nms(dets, sigma=0.5, Nt=0.3, threshold=0.001, method=1): diff --git a/keras_csp/nms_wrapper.pyc b/keras_csp/nms_wrapper.pyc deleted file mode 100644 index 7dd5db0..0000000 Binary files a/keras_csp/nms_wrapper.pyc and /dev/null differ diff --git a/keras_csp/parallel_model.py b/keras_csp/parallel_model.py index d5d42b2..01c7af8 100644 --- a/keras_csp/parallel_model.py +++ b/keras_csp/parallel_model.py @@ -71,8 +71,8 @@ def make_parallel(self): with tf.device('/gpu:%d' % i): with tf.name_scope('tower_%d' % i): # Run a slice of inputs through this replica - zipped_inputs = zip(self.inner_model.input_names, - self.inner_model.inputs) + zipped_inputs = list(zip(self.inner_model.input_names, + self.inner_model.inputs)) inputs = [ KL.Lambda(lambda s: input_slices[name][i], output_shape=lambda s: (None,)+s[1:])(tensor) @@ -146,8 +146,8 @@ def build_model(x_train, num_classes): x_train = np.expand_dims(x_train, -1).astype('float32') / 255 x_test = np.expand_dims(x_test, -1).astype('float32') / 255 - print('x_train shape:', x_train.shape) - print('x_test shape:', x_test.shape) + print(('x_train shape:', x_train.shape)) + print(('x_test shape:', x_test.shape)) # Build data generator and model datagen = ImageDataGenerator() diff --git a/keras_csp/parallel_model.pyc b/keras_csp/parallel_model.pyc deleted file mode 100644 index f2ff41b..0000000 Binary files a/keras_csp/parallel_model.pyc and /dev/null differ diff --git a/keras_csp/resnet50.py b/keras_csp/resnet50.py index 64591fc..037e60d 100644 --- a/keras_csp/resnet50.py +++ b/keras_csp/resnet50.py @@ -1,9 +1,9 @@ # -*- coding: utf-8 -*- -from __future__ import print_function -from __future__ import absolute_import -from __future__ import division + + + from keras.layers import * from keras import backend as K from .keras_layer_L2Normalization import L2Normalization diff --git a/keras_csp/resnet50.pyc b/keras_csp/resnet50.pyc deleted file mode 100644 index c11bf96..0000000 Binary files a/keras_csp/resnet50.pyc and /dev/null differ diff --git a/keras_csp/utilsfunc.py b/keras_csp/utilsfunc.py index ce05628..127595c 100644 --- a/keras_csp/utilsfunc.py +++ b/keras_csp/utilsfunc.py @@ -1,71 +1,78 @@ -from __future__ import division import cv2 import numpy as np + def format_img_size(img, C): - """ formats the image size based on config """ - img_min_side = float(C.im_size) - (height,width,_) = img.shape - - if width <= height: - ratio = img_min_side/width - new_height = int(ratio * height) - new_width = int(img_min_side) - else: - ratio = img_min_side/height - new_width = int(ratio * width) - new_height = int(img_min_side) - img = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_CUBIC) - return img, ratio + """ formats the image size based on config """ + img_min_side = float(C.im_size) + (height, width, _) = img.shape + + if width <= height: + ratio = img_min_side / width + new_height = int(ratio * height) + new_width = int(img_min_side) + else: + ratio = img_min_side / height + new_width = int(ratio * width) + new_height = int(img_min_side) + img = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_CUBIC) + return img, ratio + + def format_img_channels(img, C): - """ formats the image channels based on config """ - # img = img[:, :, (2, 1, 0)] - img = img.astype(np.float32) - img[:, :, 0] -= C.img_channel_mean[0] - img[:, :, 1] -= C.img_channel_mean[1] - img[:, :, 2] -= C.img_channel_mean[2] - # img /= C.img_scaling_factor - # img = np.transpose(img, (2, 0, 1)) - img = np.expand_dims(img, axis=0) - return img + """ formats the image channels based on config """ + # img = img[:, :, (2, 1, 0)] + img = img.astype(np.float32) + img[:, :, 0] -= C.img_channel_mean[0] + img[:, :, 1] -= C.img_channel_mean[1] + img[:, :, 2] -= C.img_channel_mean[2] + # img /= C.img_scaling_factor + # img = np.transpose(img, (2, 0, 1)) + img = np.expand_dims(img, axis=0) + return img + def format_img_batch(img, C): - """ formats the image channels based on config """ - # img = img[:, :, (2, 1, 0)] - img = img.astype(np.float32) - img[:, :, :, 0] -= C.img_channel_mean[0] - img[:, :, :, 1] -= C.img_channel_mean[1] - img[:, :, :, 2] -= C.img_channel_mean[2] - # img /= C.img_scaling_factor - # img = np.transpose(img, (2, 0, 1)) - # img = np.expand_dims(img, axis=0) - return img + """ formats the image channels based on config """ + # img = img[:, :, (2, 1, 0)] + img = img.astype(np.float32) + img[:, :, :, 0] -= C.img_channel_mean[0] + img[:, :, :, 1] -= C.img_channel_mean[1] + img[:, :, :, 2] -= C.img_channel_mean[2] + # img /= C.img_scaling_factor + # img = np.transpose(img, (2, 0, 1)) + # img = np.expand_dims(img, axis=0) + return img + def format_img(img, C): - """ formats an image for model prediction based on config """ - # img, ratio = format_img_size(img, C) - img = format_img_channels(img, C) - return img #return img, ratio + """ formats an image for model prediction based on config """ + # img, ratio = format_img_size(img, C) + img = format_img_channels(img, C) + return img # return img, ratio + def format_img_inria(img, C): - img_h, img_w = img.shape[:2] - # img_h_new, img_w_new = int(round(img_h/16)*16), int(round(img_w/16)*16) - # img = cv2.resize(img, (img_w_new, img_h_new)) - img_h_new, img_w_new = int(np.ceil(img_h/16)*16), int(np.ceil(img_w/16)*16) - paved_image = np.zeros((img_h_new, img_w_new, 3), dtype=img.dtype) - paved_image[0:img_h,0:img_w] = img - img = format_img_channels(paved_image, C) - return img + img_h, img_w = img.shape[:2] + # img_h_new, img_w_new = int(round(img_h/16)*16), int(round(img_w/16)*16) + # img = cv2.resize(img, (img_w_new, img_h_new)) + img_h_new, img_w_new = int(np.ceil(img_h / 16) * 16), int(np.ceil(img_w / 16) * 16) + paved_image = np.zeros((img_h_new, img_w_new, 3), dtype=img.dtype) + paved_image[0:img_h, 0:img_w] = img + img = format_img_channels(paved_image, C) + return img + def format_img_ratio(img, C, ratio): - img = img.astype(np.float32) - img[:, :, 0] -= C.img_channel_mean[0] - img[:, :, 1] -= C.img_channel_mean[1] - img[:, :, 2] -= C.img_channel_mean[2] - img = cv2.resize(img, None, None, fx=ratio, fy=ratio) - # img = cv2.resize(img, None, None, fx=ratio, fy=ratio, interpolation=cv2.INTER_CUBIC) - img = np.expand_dims(img, axis=0) - return img #return img, ratio + img = img.astype(np.float32) + img[:, :, 0] -= C.img_channel_mean[0] + img[:, :, 1] -= C.img_channel_mean[1] + img[:, :, 2] -= C.img_channel_mean[2] + img = cv2.resize(img, None, None, fx=ratio, fy=ratio) + # img = cv2.resize(img, None, None, fx=ratio, fy=ratio, interpolation=cv2.INTER_CUBIC) + img = np.expand_dims(img, axis=0) + return img # return img, ratio + def preprocess_input_test(x): x = x.astype(np.float32) @@ -75,24 +82,26 @@ def preprocess_input_test(x): x = np.expand_dims(x, axis=0) return x + # Method to transform the coordinates of the bounding box to its original size def get_real_coordinates(ratio, x1, y1, x2, y2): + real_x1 = int(round(x1 // ratio)) + real_y1 = int(round(y1 // ratio)) + real_x2 = int(round(x2 // ratio)) + real_y2 = int(round(y2 // ratio)) - real_x1 = int(round(x1 // ratio)) - real_y1 = int(round(y1 // ratio)) - real_x2 = int(round(x2 // ratio)) - real_y2 = int(round(y2 // ratio)) + return (real_x1, real_y1, real_x2, real_y2) - return (real_x1, real_y1, real_x2 ,real_y2) def intersection(ai, bi, area): - x = max(ai[0], bi[0]) - y = max(ai[1], bi[1]) - w = min(ai[2], bi[2]) - x - h = min(ai[3], bi[3]) - y - if w < 0 or h < 0: - return 0 - return w*h/area + x = max(ai[0], bi[0]) + y = max(ai[1], bi[1]) + w = min(ai[2], bi[2]) - x + h = min(ai[3], bi[3]) - y + if w < 0 or h < 0: + return 0 + return w * h / area + def box_grid_overlap(bboxes, get_img_output_length): width, height = 960, 540 @@ -109,7 +118,9 @@ def box_grid_overlap(bboxes, get_img_output_length): if num_bboxes > 0: # get the GT box coordinates, and resize to account for image resizing gta = np.zeros((num_bboxes, 4)) - gta[:,0],gta[:,1],gta[:,2],gta[:,3] = bboxes[:,0],bboxes[:,1],bboxes[:,2]+bboxes[:,0],bboxes[:,3]+bboxes[:,1] + gta[:, 0], gta[:, 1], gta[:, 2], gta[:, 3] = bboxes[:, 0], bboxes[:, 1], bboxes[:, 2] + bboxes[:, 0], bboxes[:, + 3] + bboxes[ + :, 1] # for bbox_num in range(num_bboxes): # # get the GT box coordinates, and resize to account for image resizing # gta[bbox_num, 0] = bboxes[bbox_num][0] @@ -124,62 +135,66 @@ def box_grid_overlap(bboxes, get_img_output_length): y2_anc = downscale * (jy + 1) best_op = 0 for b in range(num_bboxes): - grid = [x1_anc,y1_anc,x2_anc,y2_anc] - op = intersection(grid,gta[b,:],downscale**2) - best_op = op if op>best_op else best_op - y_grid_overlap[jy,ix] = best_op + grid = [x1_anc, y1_anc, x2_anc, y2_anc] + op = intersection(grid, gta[b, :], downscale ** 2) + best_op = op if op > best_op else best_op + y_grid_overlap[jy, ix] = best_op + + y_grid_overlap = np.expand_dims(y_grid_overlap.reshape((1, -1)), axis=0) + return y_grid_overlap - y_grid_overlap = np.expand_dims(y_grid_overlap.reshape((1,-1)), axis=0) - return y_grid_overlap def integrate_motion_score(bboxes, probs, pred, stride=16): - if len(bboxes) == 0: - return [] - probs = probs.reshape((-1,1)) - pred_anchor_score = np.zeros((probs.shape[0], 1)) - for i in range(len(bboxes)): - x1, y1, x2, y2 = int(bboxes[i][0]/stride), int(bboxes[i][1]/stride), int(bboxes[i][2]/stride), int(bboxes[i][3]/stride) - pred_anchor_score[i,0] = np.sum(pred[y1:y2, x1:x2])/((x2-x1)*(y2-y1)) - # alpha, belta = 2/0.7, 0.1 - # pred_anchor_score = np.where(pred_anchor_score>0.7, pred_anchor_score*alpha, pred_anchor_score) - # pred_anchor_score = np.maximum(pred_anchor_score*alpha, np.ones_like(pred_anchor_score)*belta) - # all_probs = pred_anchor_score - all_probs = probs*pred_anchor_score - return all_probs + if len(bboxes) == 0: + return [] + probs = probs.reshape((-1, 1)) + pred_anchor_score = np.zeros((probs.shape[0], 1)) + for i in range(len(bboxes)): + x1, y1, x2, y2 = int(bboxes[i][0] / stride), int(bboxes[i][1] / stride), int(bboxes[i][2] / stride), int( + bboxes[i][3] / stride) + pred_anchor_score[i, 0] = np.sum(pred[y1:y2, x1:x2]) / ((x2 - x1) * (y2 - y1)) + # alpha, belta = 2/0.7, 0.1 + # pred_anchor_score = np.where(pred_anchor_score>0.7, pred_anchor_score*alpha, pred_anchor_score) + # pred_anchor_score = np.maximum(pred_anchor_score*alpha, np.ones_like(pred_anchor_score)*belta) + # all_probs = pred_anchor_score + all_probs = probs * pred_anchor_score + return all_probs + def box_encoder_pp(anchors, boxes, Y1): - A = np.copy(anchors[:, :, :, :4]) - A = A.reshape((-1, 4)) - - # 1 calculate the iou scores - max_overlaps = np.zeros((anchors.shape[0] * anchors.shape[1] * anchors.shape[2],), dtype=np.float32) - if len(boxes) > 0: - boxes[:, 2] += boxes[:, 0] - boxes[:, 3] += boxes[:, 1] - overlaps = bbox_overlaps(np.ascontiguousarray(A, dtype=np.float64), - np.ascontiguousarray(boxes, dtype=np.float64)) - max_overlaps = overlaps.max(axis=1) - # normalize the iou scores - if np.max(max_overlaps) > 0: - max_overlaps = (max_overlaps - np.min(max_overlaps)) / np.max(max_overlaps) - # 2 calculate the rpn scores - rpn_score = Y1.reshape((-1)).astype(np.float32) - inds = np.where(max_overlaps == 0) - rpn_score[inds] = np.min(rpn_score) - scores = (rpn_score + max_overlaps) / 2 - scores = np.expand_dims(scores.reshape((1,-1)).astype(np.float32), axis=0) - return scores + A = np.copy(anchors[:, :, :, :4]) + A = A.reshape((-1, 4)) + + # 1 calculate the iou scores + max_overlaps = np.zeros((anchors.shape[0] * anchors.shape[1] * anchors.shape[2],), dtype=np.float32) + if len(boxes) > 0: + boxes[:, 2] += boxes[:, 0] + boxes[:, 3] += boxes[:, 1] + overlaps = bbox_overlaps(np.ascontiguousarray(A, dtype=np.float64), + np.ascontiguousarray(boxes, dtype=np.float64)) + max_overlaps = overlaps.max(axis=1) + # normalize the iou scores + if np.max(max_overlaps) > 0: + max_overlaps = (max_overlaps - np.min(max_overlaps)) / np.max(max_overlaps) + # 2 calculate the rpn scores + rpn_score = Y1.reshape((-1)).astype(np.float32) + inds = np.where(max_overlaps == 0) + rpn_score[inds] = np.min(rpn_score) + scores = (rpn_score + max_overlaps) / 2 + scores = np.expand_dims(scores.reshape((1, -1)).astype(np.float32), axis=0) + return scores + def box_encoder_iou(anchors, boxes): - A = np.copy(anchors[:, :, :, :4]) - A = A.reshape((-1, 4)) - - max_overlaps = np.zeros((anchors.shape[0] * anchors.shape[1] * anchors.shape[2],), dtype=np.float32) - if len(boxes) > 0: - boxes[:, 2] += boxes[:, 0] - boxes[:, 3] += boxes[:, 1] - overlaps = bbox_overlaps(np.ascontiguousarray(A, dtype=np.float64), - np.ascontiguousarray(boxes, dtype=np.float64)) - max_overlaps = overlaps.max(axis=1) - scores = np.expand_dims(max_overlaps.reshape((1,-1)).astype(np.float32), axis=0) - return scores + A = np.copy(anchors[:, :, :, :4]) + A = A.reshape((-1, 4)) + + max_overlaps = np.zeros((anchors.shape[0] * anchors.shape[1] * anchors.shape[2],), dtype=np.float32) + if len(boxes) > 0: + boxes[:, 2] += boxes[:, 0] + boxes[:, 3] += boxes[:, 1] + overlaps = bbox_overlaps(np.ascontiguousarray(A, dtype=np.float64), + np.ascontiguousarray(boxes, dtype=np.float64)) + max_overlaps = overlaps.max(axis=1) + scores = np.expand_dims(max_overlaps.reshape((1, -1)).astype(np.float32), axis=0) + return scores diff --git a/keras_csp/utilsfunc.pyc b/keras_csp/utilsfunc.pyc deleted file mode 100644 index 01c074a..0000000 Binary files a/keras_csp/utilsfunc.pyc and /dev/null differ diff --git a/requirements.txt b/requirements.txt index 002965c..06f8421 100755 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ -tensorflow-gpu==1.4.1 +tensorflow-gpu==1.14.0 easydict==1.6 joblib==0.10.3 -numpy==1.12.0 -opencv-python==3.4.1.15 -Pillow==4.0.0 +numpy==1.16.2 +opencv-python==4.1.0.25 +Pillow==6.1.0 keras==2.0.6 diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..fdafaf2 --- /dev/null +++ b/setup.py @@ -0,0 +1,130 @@ +import os +from Cython.Distutils import build_ext +from distutils.core import setup, Extension +from Cython.Build import cythonize +import numpy as np + +try: + numpy_include = np.get_include() +except AttributeError: + numpy_include = np.get_numpy_include() + + +def customize_compiler_for_nvcc(self): + """inject deep into distutils to customize how the dispatch + to gcc/nvcc works. + If you subclass UnixCCompiler, it's not trivial to get your subclass + injected in, and still have the right customizations (i.e. + distutils.sysconfig.customize_compiler) run on it. So instead of going + the OO route, I have this. Note, it's kindof like a wierd functional + subclassing going on.""" + + # tell the compiler it can processes .cu + self.src_extensions.append('.cu') + + # save references to the default compiler_so and _comple methods + default_compiler_so = self.compiler_so + super = self._compile + + # now redefine the _compile method. This gets executed for each + # object but distutils doesn't have the ability to change compilers + # based on source extension: we add it. + def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): + if os.path.splitext(src)[1] == '.cu': + # use the cuda for .cu files + self.set_executable('compiler_so', CUDA['nvcc']) + # use only a subset of the extra_postargs, which are 1-1 translated + # from the extra_compile_args in the Extension class + postargs = extra_postargs['nvcc'] + else: + postargs = extra_postargs['gcc'] + + super(obj, src, ext, cc_args, postargs, pp_opts) + # reset the default compiler_so, which we might have changed for cuda + self.compiler_so = default_compiler_so + + # inject our redefined _compile method into the class + self._compile = _compile + + +# run the customize_compiler +class custom_build_ext(build_ext): + def build_extensions(self): + customize_compiler_for_nvcc(self.compiler) + build_ext.build_extensions(self) + + +def find_in_path(name, path): + "Find a file in a search path" + # Adapted fom + # http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/ + for dir in path.split(os.pathsep): + binpath = os.path.join(dir, name) + if os.path.exists(binpath): + return os.path.abspath(binpath) + return None + + +def locate_cuda(): + """Locate the CUDA environment on the system + Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64' + and values giving the absolute path to each directory. + Starts by looking for the CUDAHOME env variable. If not found, everything + is based on finding 'nvcc' in the PATH. + """ + + # first check if the CUDAHOME env variable is in use + if 'CUDAHOME' in os.environ: + home = os.environ['CUDAHOME'] + nvcc = os.path.join(home, 'bin', 'nvcc') + else: + # otherwise, search the PATH for NVCC + default_path = os.path.join(os.sep, 'usr', 'local', 'cuda', 'bin') + nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path) + if nvcc is None: + raise EnvironmentError('The nvcc binary could not be ' + 'located in your $PATH. Either add it to your path, or set $CUDAHOME') + home = os.path.dirname(os.path.dirname(nvcc)) + + cudaconfig = {'home': home, 'nvcc': nvcc, + 'include': os.path.join(home, 'include'), + 'lib64': os.path.join(home, 'lib64')} + for k, v in cudaconfig.items(): + if not os.path.exists(v): + raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v)) + + return cudaconfig + + +CUDA = locate_cuda() + +ext_modules = [ + Extension( + "keras_csp.nms.cpu_nms", + ["keras_csp/nms/cpu_nms.pyx"], + extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, + include_dirs=[numpy_include] + ), + Extension('keras_csp.nms.gpu_nms', + ['keras_csp/nms/nms_kernel.cu', 'keras_csp/nms/gpu_nms.pyx'], + library_dirs=[CUDA['lib64']], + libraries=['cudart'], + language='c++', + runtime_library_dirs=[CUDA['lib64']], + # this syntax is specific to this build system + # we're only going to use certain compiler args with nvcc and not with + # gcc the implementation of this trick is in customize_compiler() below + extra_compile_args={'gcc': ["-Wno-unused-function"], + 'nvcc': ['-arch=sm_35', + '--ptxas-options=-v', + '-c', + '--compiler-options', + "'-fPIC'"]}, + include_dirs=[numpy_include, CUDA['include']] + ) +] + +setup( + ext_modules=ext_modules, + cmdclass={'build_ext': custom_build_ext}, +) diff --git a/test_caltech.py b/test_caltech.py index 8ece107..7cf8926 100644 --- a/test_caltech.py +++ b/test_caltech.py @@ -1,7 +1,6 @@ -from __future__ import division import os import time -import cPickle +import pickle from keras.layers import Input from keras.models import Model from keras_csp import config, bbox_process @@ -13,9 +12,9 @@ C.offset = True cache_path = 'data/cache/caltech/test' with open(cache_path, 'rb') as fid: - val_data = cPickle.load(fid) + val_data = pickle.load(fid, encoding='latin1') num_imgs = len(val_data) -print 'num of val samples: {}'.format(num_imgs) +print('num of val samples: {}'.format(num_imgs)) C.size_test = (480, 640) input_shape_img = (C.size_test[0], C.size_test[1], 3) @@ -34,53 +33,53 @@ out_path = 'output/valresults/caltech/%s/nooff' % (C.scale) if not os.path.exists(out_path): - os.makedirs(out_path) + os.makedirs(out_path) files = sorted(os.listdir(w_path)) for w_ind in range(51, 121): - for f in files: - if f.split('_')[0] == 'net' and int(f.split('_')[1][1:]) == w_ind: - cur_file = f - break - weight1 = os.path.join(w_path, cur_file) - print 'load weights from {}'.format(weight1) - model.load_weights(weight1, by_name=True) - res_path = os.path.join(out_path, '%03d'%int(str(w_ind))) + for f in files: + if f.split('_')[0] == 'net' and int(f.split('_')[1][1:]) == w_ind: + cur_file = f + break + weight1 = os.path.join(w_path, cur_file) + print('load weights from {}'.format(weight1)) + model.load_weights(weight1, by_name=True) + res_path = os.path.join(out_path, '%03d' % int(str(w_ind))) - print res_path - if not os.path.exists(res_path): - os.mkdir(res_path) - for st in range(6, 11): - set_path = os.path.join(res_path, 'set' + '%02d' % st) - if not os.path.exists(set_path): - os.mkdir(set_path) + print(res_path) + if not os.path.exists(res_path): + os.mkdir(res_path) + for st in range(6, 11): + set_path = os.path.join(res_path, 'set' + '%02d' % st) + if not os.path.exists(set_path): + os.mkdir(set_path) - start_time = time.time() - for f in range(num_imgs): - filepath = val_data[f]['filepath'] - filepath_next = val_data[f + 1]['filepath'] if f < num_imgs - 1 else val_data[f]['filepath'] - set = filepath.split('/')[-1].split('_')[0] - video = filepath.split('/')[-1].split('_')[1] - frame_number = int(filepath.split('/')[-1].split('_')[2][1:6]) + 1 - frame_number_next = int(filepath_next.split('/')[-1].split('_')[2][1:6]) + 1 - set_path = os.path.join(res_path, set) - video_path = os.path.join(set_path, video + '.txt') - if os.path.exists(video_path): - continue - if frame_number == 30: - res_all = [] - img = cv2.imread(filepath) - x_rcnn = format_img(img, C) - Y = model.predict(x_rcnn) + start_time = time.time() + for f in range(num_imgs): + filepath = val_data[f]['filepath'] + filepath_next = val_data[f + 1]['filepath'] if f < num_imgs - 1 else val_data[f]['filepath'] + set = filepath.split('/')[-1].split('_')[0] + video = filepath.split('/')[-1].split('_')[1] + frame_number = int(filepath.split('/')[-1].split('_')[2][1:6]) + 1 + frame_number_next = int(filepath_next.split('/')[-1].split('_')[2][1:6]) + 1 + set_path = os.path.join(res_path, set) + video_path = os.path.join(set_path, video + '.txt') + if os.path.exists(video_path): + continue + if frame_number == 30: + res_all = [] + img = cv2.imread(filepath) + x_rcnn = format_img(img, C) + Y = model.predict(x_rcnn) - if C.offset: - boxes = bbox_process.parse_det_offset(Y, C, score=0.01,down=4) - else: - boxes = bbox_process.parse_det(Y, C, score=0.01, down=4, scale=C.scale) + if C.offset: + boxes = bbox_process.parse_det_offset(Y, C, score=0.01, down=4) + else: + boxes = bbox_process.parse_det(Y, C, score=0.01, down=4, scale=C.scale) - if len(boxes)>0: - f_res = np.repeat(frame_number, len(boxes), axis=0).reshape((-1, 1)) - boxes[:, [2, 3]] -= boxes[:, [0, 1]] - res_all += np.concatenate((f_res, boxes), axis=-1).tolist() - if frame_number_next == 30 or f == num_imgs - 1: - np.savetxt(video_path, np.array(res_all), fmt='%6f') - print time.time() - start_time + if len(boxes) > 0: + f_res = np.repeat(frame_number, len(boxes), axis=0).reshape((-1, 1)) + boxes[:, [2, 3]] -= boxes[:, [0, 1]] + res_all += np.concatenate((f_res, boxes), axis=-1).tolist() + if frame_number_next == 30 or f == num_imgs - 1: + np.savetxt(video_path, np.array(res_all), fmt='%6f') + print(time.time() - start_time) diff --git a/test_city.py b/test_city.py index 644659d..4f787a9 100644 --- a/test_city.py +++ b/test_city.py @@ -1,7 +1,6 @@ -from __future__ import division import os import time -import cPickle +import pickle from keras.layers import Input from keras.models import Model from keras_csp import config, bbox_process @@ -12,18 +11,18 @@ C.offset = True cache_path = 'data/cache/cityperson/val_500' with open(cache_path, 'rb') as fid: - val_data = cPickle.load(fid) + val_data = pickle.load(fid, encoding='latin1') num_imgs = len(val_data) -print 'num of val samples: {}'.format(num_imgs) +print('num of val samples: {}'.format(num_imgs)) C.size_test = (1024, 2048) input_shape_img = (C.size_test[0], C.size_test[1], 3) img_input = Input(shape=input_shape_img) # define the base network (resnet here, can be MobileNet, etc) -if C.network=='resnet50': +if C.network == 'resnet50': from keras_csp import resnet50 as nn -elif C.network=='mobilenet': +elif C.network == 'mobilenet': from keras_csp import mobilenet as nn else: raise NotImplementedError('Not support network: {}'.format(C.network)) @@ -39,37 +38,39 @@ w_path = 'output/valmodels/city/%s/nooff' % (C.scale) out_path = 'output/valresults/city/%s/nooff' % (C.scale) if not os.path.exists(out_path): - os.makedirs(out_path) + os.makedirs(out_path) files = sorted(os.listdir(w_path)) # get the results from epoch 51 to epoch 150 -for w_ind in range(51,151): - for f in files: - if f.split('_')[0] == 'net' and int(f.split('_')[1][1:]) == w_ind: - cur_file = f - break - weight1 = os.path.join(w_path, cur_file) - print 'load weights from {}'.format(weight1) - model.load_weights(weight1, by_name=True) - res_path = os.path.join(out_path, '%03d'%int(str(w_ind))) - if not os.path.exists(res_path): - os.makedirs(res_path) - print res_path - res_file = os.path.join(res_path, 'val_det.txt') - res_all = [] - start_time = time.time() - for f in range(num_imgs): - filepath = val_data[f]['filepath'] - img = cv2.imread(filepath) - x_rcnn = format_img(img, C) - Y = model.predict(x_rcnn) +for w_ind in range(150, 151): + for f in files: + if f.split('_')[0] == 'net' and int(f.split('_')[1][1:]) == w_ind: + cur_file = f + break + weight1 = os.path.join(w_path, cur_file) + print('load weights from {}'.format(weight1)) + model.load_weights(weight1, by_name=True) + res_path = os.path.join(out_path, '%03d' % int(str(w_ind))) + if not os.path.exists(res_path): + os.makedirs(res_path) + print(res_path) + res_file = os.path.join(res_path, 'val_det.txt') + res_all = [] + start_time = time.time() + for f in range(num_imgs): + filepath = val_data[f]['filepath'] + img = cv2.imread(filepath) + if img is None: + raise RuntimeError("image at %s not found" % filepath) + x_rcnn = format_img(img, C) + Y = model.predict(x_rcnn) - if C.offset: - boxes = bbox_process.parse_det_offset(Y, C, score=0.1,down=4) - else: - boxes = bbox_process.parse_det(Y, C, score=0.1, down=4, scale=C.scale) - if len(boxes)>0: - f_res = np.repeat(f+1, len(boxes), axis=0).reshape((-1, 1)) - boxes[:, [2, 3]] -= boxes[:, [0, 1]] - res_all += np.concatenate((f_res, boxes), axis=-1).tolist() - np.savetxt(res_file, np.array(res_all), fmt='%6f') - print time.time() - start_time + if C.offset: + boxes = bbox_process.parse_det_offset(Y, C, score=0.1, down=4) + else: + boxes = bbox_process.parse_det(Y, C, score=0.1, down=4, scale=C.scale) + if len(boxes) > 0: + f_res = np.repeat(f + 1, len(boxes), axis=0).reshape((-1, 1)) + boxes[:, [2, 3]] -= boxes[:, [0, 1]] + res_all += np.concatenate((f_res, boxes), axis=-1).tolist() + np.savetxt(res_file, np.array(res_all), fmt='%6f') + print(time.time() - start_time) diff --git a/test_wider_ms.py b/test_wider_ms.py index 4e02316..b8b2169 100644 --- a/test_wider_ms.py +++ b/test_wider_ms.py @@ -1,7 +1,6 @@ -from __future__ import division import os import time -import cPickle +import pickle from keras.layers import Input from keras.models import Model from keras_csp import config, bbox_process @@ -14,9 +13,9 @@ C.num_scale = 2 cache_path = 'data/cache/widerface/val' with open(cache_path, 'rb') as fid: - val_data = cPickle.load(fid) + val_data = pickle.load(fid, encoding='latin1') num_imgs = len(val_data) -print 'num of val samples: {}'.format(num_imgs) +print('num of val samples: {}'.format(num_imgs)) C.size_test = [0, 0] input_shape_img = (None, None, 3) @@ -24,6 +23,7 @@ # define the base network (resnet here, can be MobileNet, etc) from keras_csp import resnet50 as nn + # define the network prediction preds = nn.nn_p3p4p5(img_input, offset=C.offset, num_scale=C.num_scale, trainable=True) model = Model(img_input, preds) @@ -35,129 +35,139 @@ w_path = 'output/valmodels/wider/%s/nooff' % (C.scale) out_path = 'output/valresults/wider/%s/nooff' % (C.scale) if not os.path.exists(out_path): - os.makedirs(out_path) + os.makedirs(out_path) files = sorted(os.listdir(w_path)) # get the results from epoch 51 to epoch 150 -for w_ind in range(382,383): - for f in files: - if f.split('_')[0] == 'net' and int(f.split('_')[1][1:]) == w_ind: - cur_file = f - break - weight1 = os.path.join(w_path, cur_file) - print 'load weights from {}'.format(weight1) - model.load_weights(weight1, by_name=True) - res_path = os.path.join(out_path, '%03d'%int(str(w_ind))) - if not os.path.exists(res_path): - os.makedirs(res_path) - print res_path - - start_time = time.time() - for f in range(num_imgs): - filepath = val_data[f]['filepath'] - event = filepath.split('/')[-2] - event_path = os.path.join(res_path, event) - if not os.path.exists(event_path): - os.mkdir(event_path) - filename = filepath.split('/')[-1].split('.')[0] - txtpath = os.path.join(event_path, filename + '.txt') - if os.path.exists(txtpath): - continue - - img = cv2.imread(filepath) - - def detect_face(img, scale=1, flip=False): - img_h, img_w = img.shape[:2] - img_h_new, img_w_new = int(np.ceil(scale * img_h / 16) * 16), int(np.ceil(scale * img_w / 16) * 16) - scale_h, scale_w = img_h_new / img_h, img_w_new / img_w - - img_s = cv2.resize(img, None, None, fx=scale_w, fy=scale_h, interpolation=cv2.INTER_LINEAR) - # img_h, img_w = img_s.shape[:2] - # print frame_number - C.size_test[0] = img_h_new - C.size_test[1] = img_w_new - - if flip: - img_sf = cv2.flip(img_s, 1) - # x_rcnn = format_img_pad(img_sf, C) - x_rcnn = format_img(img_sf, C) - else: - # x_rcnn = format_img_pad(img_s, C) - x_rcnn = format_img(img_s, C) - Y = model.predict(x_rcnn) - boxes = bbox_process.parse_wider_offset(Y, C, score=0.05, nmsthre=0.6) - if len(boxes) > 0: - keep_index = np.where(np.minimum(boxes[:, 2] - boxes[:, 0], boxes[:, 3] - boxes[:, 1]) >= 12)[0] - boxes = boxes[keep_index, :] - if len(boxes) > 0: - if flip: - boxes[:, [0, 2]] = img_s.shape[1] - boxes[:, [2, 0]] - boxes[:, 0:4:2] = boxes[:, 0:4:2] / scale_w - boxes[:, 1:4:2] = boxes[:, 1:4:2] / scale_h - else: - boxes = np.empty(shape=[0, 5], dtype=np.float32) - return boxes - - def im_det_ms_pyramid(image, max_im_shrink): - # shrink detecting and shrink only detect big face - det_s = np.row_stack((detect_face(image, 0.5), detect_face(image, 0.5, flip=True))) - index = np.where(np.maximum(det_s[:, 2] - det_s[:, 0] + 1, det_s[:, 3] - det_s[:, 1] + 1) > 64)[0] - det_s = det_s[index, :] - - det_temp = np.row_stack((detect_face(image, 0.75), detect_face(image, 0.75, flip=True))) - index = np.where(np.maximum(det_temp[:, 2] - det_temp[:, 0] + 1, det_temp[:, 3] - det_temp[:, 1] + 1) > 32)[0] - det_temp = det_temp[index, :] - det_s = np.row_stack((det_s, det_temp)) - - det_temp = np.row_stack((detect_face(image, 0.25), detect_face(image, 0.25, flip=True))) - index = np.where(np.maximum(det_temp[:, 2] - det_temp[:, 0] + 1, det_temp[:, 3] - det_temp[:, 1] + 1) > 96)[0] - det_temp = det_temp[index, :] - det_s = np.row_stack((det_s, det_temp)) - - st = [1.25, 1.5, 1.75, 2.0, 2.25] - for i in range(len(st)): - if (st[i] <= max_im_shrink): - det_temp = np.row_stack((detect_face(image, st[i]), detect_face(image, st[i], flip=True))) - # Enlarged images are only used to detect small faces. - if st[i] == 1.25: - index = np.where( - np.minimum(det_temp[:, 2] - det_temp[:, 0] + 1, det_temp[:, 3] - det_temp[:, 1] + 1) < 128)[0] - det_temp = det_temp[index, :] - elif st[i] == 1.5: - index = np.where( - np.minimum(det_temp[:, 2] - det_temp[:, 0] + 1, det_temp[:, 3] - det_temp[:, 1] + 1) < 96)[0] - det_temp = det_temp[index, :] - elif st[i] == 1.75: - index = np.where( - np.minimum(det_temp[:, 2] - det_temp[:, 0] + 1, det_temp[:, 3] - det_temp[:, 1] + 1) < 64)[0] - det_temp = det_temp[index, :] - elif st[i] == 2.0: - index = np.where( - np.minimum(det_temp[:, 2] - det_temp[:, 0] + 1, det_temp[:, 3] - det_temp[:, 1] + 1) < 48)[0] - det_temp = det_temp[index, :] - elif st[i] == 2.25: - index = np.where( - np.minimum(det_temp[:, 2] - det_temp[:, 0] + 1, det_temp[:, 3] - det_temp[:, 1] + 1) < 32)[0] - det_temp = det_temp[index, :] - det_s = np.row_stack((det_s, det_temp)) - return det_s - - max_im_shrink = (0x7fffffff / 577.0 / (img.shape[0] * img.shape[1])) ** 0.5 # the max size of input image - shrink = max_im_shrink if max_im_shrink < 1 else 1 - det0 = detect_face(img) - det1 = detect_face(img, flip=True) - det2 = im_det_ms_pyramid(img, max_im_shrink) - # merge all test results via bounding box voting - det = np.row_stack((det0, det1, det2)) - keep_index = np.where(np.minimum(det[:, 2] - det[:, 0], det[:, 3] - det[:, 1]) >= 3)[0] - det = det[keep_index, :] - dets = bbox_process.soft_bbox_vote(det, thre=0.4) - keep_index = np.where((dets[:, 2] - dets[:, 0] + 1) * (dets[:, 3] - dets[:, 1] + 1) >= 6 ** 2)[0] - dets = dets[keep_index, :] - - with open(txtpath, 'w') as f: - f.write('{:s}\n'.format(filename)) - f.write('{:d}\n'.format(len(dets))) - for line in dets: - f.write('{:.0f} {:.0f} {:.0f} {:.0f} {:.3f}\n'. - format(line[0], line[1], line[2] - line[0] + 1, line[3] - line[1] + 1, line[4])) - print time.time() - start_time \ No newline at end of file +for w_ind in range(382, 383): + for f in files: + if f.split('_')[0] == 'net' and int(f.split('_')[1][1:]) == w_ind: + cur_file = f + break + weight1 = os.path.join(w_path, cur_file) + print('load weights from {}'.format(weight1)) + model.load_weights(weight1, by_name=True) + res_path = os.path.join(out_path, '%03d' % int(str(w_ind))) + if not os.path.exists(res_path): + os.makedirs(res_path) + print(res_path) + + start_time = time.time() + for f in range(num_imgs): + filepath = val_data[f]['filepath'] + event = filepath.split('/')[-2] + event_path = os.path.join(res_path, event) + if not os.path.exists(event_path): + os.mkdir(event_path) + filename = filepath.split('/')[-1].split('.')[0] + txtpath = os.path.join(event_path, filename + '.txt') + if os.path.exists(txtpath): + continue + + img = cv2.imread(filepath) + + + def detect_face(img, scale=1, flip=False): + img_h, img_w = img.shape[:2] + img_h_new, img_w_new = int(np.ceil(scale * img_h / 16) * 16), int(np.ceil(scale * img_w / 16) * 16) + scale_h, scale_w = img_h_new / img_h, img_w_new / img_w + + img_s = cv2.resize(img, None, None, fx=scale_w, fy=scale_h, interpolation=cv2.INTER_LINEAR) + # img_h, img_w = img_s.shape[:2] + # print frame_number + C.size_test[0] = img_h_new + C.size_test[1] = img_w_new + + if flip: + img_sf = cv2.flip(img_s, 1) + # x_rcnn = format_img_pad(img_sf, C) + x_rcnn = format_img(img_sf, C) + else: + # x_rcnn = format_img_pad(img_s, C) + x_rcnn = format_img(img_s, C) + Y = model.predict(x_rcnn) + boxes = bbox_process.parse_wider_offset(Y, C, score=0.05, nmsthre=0.6) + if len(boxes) > 0: + keep_index = np.where(np.minimum(boxes[:, 2] - boxes[:, 0], boxes[:, 3] - boxes[:, 1]) >= 12)[0] + boxes = boxes[keep_index, :] + if len(boxes) > 0: + if flip: + boxes[:, [0, 2]] = img_s.shape[1] - boxes[:, [2, 0]] + boxes[:, 0:4:2] = boxes[:, 0:4:2] / scale_w + boxes[:, 1:4:2] = boxes[:, 1:4:2] / scale_h + else: + boxes = np.empty(shape=[0, 5], dtype=np.float32) + return boxes + + + def im_det_ms_pyramid(image, max_im_shrink): + # shrink detecting and shrink only detect big face + det_s = np.row_stack((detect_face(image, 0.5), detect_face(image, 0.5, flip=True))) + index = np.where(np.maximum(det_s[:, 2] - det_s[:, 0] + 1, det_s[:, 3] - det_s[:, 1] + 1) > 64)[0] + det_s = det_s[index, :] + + det_temp = np.row_stack((detect_face(image, 0.75), detect_face(image, 0.75, flip=True))) + index = np.where(np.maximum(det_temp[:, 2] - det_temp[:, 0] + 1, det_temp[:, 3] - det_temp[:, 1] + 1) > 32)[ + 0] + det_temp = det_temp[index, :] + det_s = np.row_stack((det_s, det_temp)) + + det_temp = np.row_stack((detect_face(image, 0.25), detect_face(image, 0.25, flip=True))) + index = np.where(np.maximum(det_temp[:, 2] - det_temp[:, 0] + 1, det_temp[:, 3] - det_temp[:, 1] + 1) > 96)[ + 0] + det_temp = det_temp[index, :] + det_s = np.row_stack((det_s, det_temp)) + + st = [1.25, 1.5, 1.75, 2.0, 2.25] + for i in range(len(st)): + if (st[i] <= max_im_shrink): + det_temp = np.row_stack((detect_face(image, st[i]), detect_face(image, st[i], flip=True))) + # Enlarged images are only used to detect small faces. + if st[i] == 1.25: + index = np.where( + np.minimum(det_temp[:, 2] - det_temp[:, 0] + 1, det_temp[:, 3] - det_temp[:, 1] + 1) < 128)[ + 0] + det_temp = det_temp[index, :] + elif st[i] == 1.5: + index = np.where( + np.minimum(det_temp[:, 2] - det_temp[:, 0] + 1, det_temp[:, 3] - det_temp[:, 1] + 1) < 96)[ + 0] + det_temp = det_temp[index, :] + elif st[i] == 1.75: + index = np.where( + np.minimum(det_temp[:, 2] - det_temp[:, 0] + 1, det_temp[:, 3] - det_temp[:, 1] + 1) < 64)[ + 0] + det_temp = det_temp[index, :] + elif st[i] == 2.0: + index = np.where( + np.minimum(det_temp[:, 2] - det_temp[:, 0] + 1, det_temp[:, 3] - det_temp[:, 1] + 1) < 48)[ + 0] + det_temp = det_temp[index, :] + elif st[i] == 2.25: + index = np.where( + np.minimum(det_temp[:, 2] - det_temp[:, 0] + 1, det_temp[:, 3] - det_temp[:, 1] + 1) < 32)[ + 0] + det_temp = det_temp[index, :] + det_s = np.row_stack((det_s, det_temp)) + return det_s + + + max_im_shrink = (0x7fffffff / 577.0 / (img.shape[0] * img.shape[1])) ** 0.5 # the max size of input image + shrink = max_im_shrink if max_im_shrink < 1 else 1 + det0 = detect_face(img) + det1 = detect_face(img, flip=True) + det2 = im_det_ms_pyramid(img, max_im_shrink) + # merge all test results via bounding box voting + det = np.row_stack((det0, det1, det2)) + keep_index = np.where(np.minimum(det[:, 2] - det[:, 0], det[:, 3] - det[:, 1]) >= 3)[0] + det = det[keep_index, :] + dets = bbox_process.soft_bbox_vote(det, thre=0.4) + keep_index = np.where((dets[:, 2] - dets[:, 0] + 1) * (dets[:, 3] - dets[:, 1] + 1) >= 6 ** 2)[0] + dets = dets[keep_index, :] + + with open(txtpath, 'w') as f: + f.write('{:s}\n'.format(filename)) + f.write('{:d}\n'.format(len(dets))) + for line in dets: + f.write('{:.0f} {:.0f} {:.0f} {:.0f} {:.3f}\n'. + format(line[0], line[1], line[2] - line[0] + 1, line[3] - line[1] + 1, line[4])) + print(time.time() - start_time) diff --git a/train_caltech.py b/train_caltech.py index 7359d8a..ec6b0f1 100644 --- a/train_caltech.py +++ b/train_caltech.py @@ -1,9 +1,8 @@ -from __future__ import division import random import sys, os import time import numpy as np -import cPickle +import pickle from keras.utils import generic_utils from keras.optimizers import Adam from keras.layers import Input @@ -28,20 +27,22 @@ cache_ped = 'data/cache/caltech/train_gt' cache_emp = 'data/cache/caltech/train_nogt' with open(cache_ped, 'rb') as fid: - ped_data = cPickle.load(fid) + ped_data = pickle.load(fid, encoding='latin1') with open(cache_emp, 'rb') as fid: - emp_data = cPickle.load(fid) + emp_data = pickle.load(fid, encoding='latin1') num_imgs_ped = len(ped_data) num_imgs_emp = len(emp_data) -print ('num of ped and emp samples: {} {}'.format(num_imgs_ped,num_imgs_emp)) +print(('num of ped and emp samples: {} {}'.format(num_imgs_ped, num_imgs_emp))) data_gen_train = data_generators.get_data_hybrid(ped_data, emp_data, C, batchsize=batchsize, hyratio=0.5) # define the base network (resnet here, can be MobileNet, etc) -if C.network=='resnet50': +if C.network == 'resnet50': from keras_csp import resnet50 as nn + weight_path = 'data/models/resnet50_weights_tf_dim_ordering_tf_kernels.h5' -elif C.network=='mobilenet': +elif C.network == 'mobilenet': from keras_csp import mobilenet as nn + weight_path = 'data/models/mobilenet_1_0_224_tf_no_top.h5' else: raise NotImplementedError('Not support network: {}'.format(C.network)) @@ -53,48 +54,47 @@ preds_tea = nn.nn_p3p4p5(img_input, offset=C.offset, num_scale=C.num_scale, trainable=True) model = Model(img_input, preds) -if num_gpu>1: +if num_gpu > 1: from keras_csp.parallel_model import ParallelModel + model = ParallelModel(model, int(num_gpu)) model_stu = Model(img_input, preds) model_tea = Model(img_input, preds_tea) model.load_weights(weight_path, by_name=True) model_tea.load_weights(weight_path, by_name=True) -print 'load weights from {}'.format(weight_path) +print('load weights from {}'.format(weight_path)) if C.offset: out_path = 'output/valmodels/caltech/%s/off2' % (C.scale) else: out_path = 'output/valmodels/caltech/%s/nooff' % (C.scale) - if not os.path.exists(out_path): os.makedirs(out_path) -res_file = os.path.join(out_path,'records.txt') +res_file = os.path.join(out_path, 'records.txt') optimizer = Adam(lr=C.init_lr) if C.offset: model.compile(optimizer=optimizer, loss=[losses.cls_center, losses.regr_h, losses.regr_offset]) else: - if C.scale=='hw': + if C.scale == 'hw': model.compile(optimizer=optimizer, loss=[losses.cls_center, losses.regr_hw]) else: model.compile(optimizer=optimizer, loss=[losses.cls_center, losses.regr_h]) - -epoch_length = int(C.iter_per_epoch/batchsize) +epoch_length = int(C.iter_per_epoch / batchsize) iter_num = 0 add_epoch = 0 losses = np.zeros((epoch_length, 3)) best_loss = np.Inf -print('Starting training with lr {} and alpha {}'.format(C.init_lr, C.alpha)) +print(('Starting training with lr {} and alpha {}'.format(C.init_lr, C.alpha))) start_time = time.time() total_loss_r, cls_loss_r1, regr_loss_r1, offset_loss_r1 = [], [], [], [] for epoch_num in range(C.num_epochs): progbar = generic_utils.Progbar(epoch_length) - print('Epoch {}/{}'.format(epoch_num + 1 + add_epoch, C.num_epochs + C.add_epoch)) + print(('Epoch {}/{}'.format(epoch_num + 1 + add_epoch, C.num_epochs + C.add_epoch))) while True: try: X, Y = next(data_gen_train) @@ -102,12 +102,13 @@ for l in model_tea.layers: weights_tea = l.get_weights() - if len(weights_tea)>0: + if len(weights_tea) > 0: if num_gpu > 1: weights_stu = model_stu.get_layer(name=l.name).get_weights() else: weights_stu = model.get_layer(name=l.name).get_weights() - weights_tea = [C.alpha*w_tea + (1-C.alpha)*w_stu for (w_tea, w_stu) in zip(weights_tea, weights_stu)] + weights_tea = [C.alpha * w_tea + (1 - C.alpha) * w_stu for (w_tea, w_stu) in + zip(weights_tea, weights_stu)] l.set_weights(weights_tea) # print loss_s1 losses[iter_num, 0] = loss_s1[1] @@ -120,30 +121,32 @@ iter_num += 1 if iter_num % 20 == 0: progbar.update(iter_num, - [('cls', np.mean(losses[:iter_num, 0])), ('regr_h', np.mean(losses[:iter_num, 1])), ('offset', np.mean(losses[:iter_num, 2]))]) + [('cls', np.mean(losses[:iter_num, 0])), ('regr_h', np.mean(losses[:iter_num, 1])), + ('offset', np.mean(losses[:iter_num, 2]))]) if iter_num == epoch_length: cls_loss1 = np.mean(losses[:, 0]) regr_loss1 = np.mean(losses[:, 1]) offset_loss1 = np.mean(losses[:, 2]) - total_loss = cls_loss1+regr_loss1+offset_loss1 + total_loss = cls_loss1 + regr_loss1 + offset_loss1 total_loss_r.append(total_loss) cls_loss_r1.append(cls_loss1) regr_loss_r1.append(regr_loss1) offset_loss_r1.append(offset_loss1) - print('Total loss: {}'.format(total_loss)) - print('Elapsed time: {}'.format(time.time() - start_time)) + print(('Total loss: {}'.format(total_loss))) + print(('Elapsed time: {}'.format(time.time() - start_time))) iter_num = 0 start_time = time.time() if total_loss < best_loss: - print('Total loss decreased from {} to {}, saving weights'.format(best_loss, total_loss)) + print(('Total loss decreased from {} to {}, saving weights'.format(best_loss, total_loss))) best_loss = total_loss - model_tea.save_weights(os.path.join(out_path, 'net_e{}_l{}.hdf5'.format(epoch_num + 1 + add_epoch, total_loss))) + model_tea.save_weights( + os.path.join(out_path, 'net_e{}_l{}.hdf5'.format(epoch_num + 1 + add_epoch, total_loss))) break except Exception as e: - print ('Exception: {}'.format(e)) + print(('Exception: {}'.format(e))) continue records = np.concatenate((np.asarray(total_loss_r).reshape((-1, 1)), np.asarray(cls_loss_r1).reshape((-1, 1)), @@ -151,4 +154,4 @@ np.asarray(offset_loss_r1).reshape((-1, 1)),), axis=-1) np.savetxt(res_file, np.array(records), fmt='%.6f') -print('Training complete, exiting.') \ No newline at end of file +print('Training complete, exiting.') diff --git a/train_city.py b/train_city.py index 8eb64ac..cd6c58f 100644 --- a/train_city.py +++ b/train_city.py @@ -1,9 +1,9 @@ -from __future__ import division +import glob import random import sys, os import time import numpy as np -import cPickle +import pickle from keras.utils import generic_utils from keras.optimizers import Adam from keras.layers import Input @@ -15,7 +15,7 @@ C = config.Config() C.gpu_ids = '0,1,2,3' C.onegpu = 2 -C.size_train = (640,1280) +C.size_train = (640, 1280) C.init_lr = 2e-4 C.num_epochs = 150 C.offset = True @@ -27,118 +27,128 @@ # get the training data cache_path = 'data/cache/cityperson/train_h50' with open(cache_path, 'rb') as fid: - train_data = cPickle.load(fid) + train_data = pickle.load(fid, encoding='latin1') num_imgs_train = len(train_data) random.shuffle(train_data) -print 'num of training samples: {}'.format(num_imgs_train) +print('num of training samples: {}'.format(num_imgs_train)) data_gen_train = data_generators.get_data(train_data, C, batchsize=batchsize) # define the base network (resnet here, can be MobileNet, etc) -if C.network=='resnet50': +if C.network == 'resnet50': from keras_csp import resnet50 as nn + weight_path = 'data/models/resnet50_weights_tf_dim_ordering_tf_kernels.h5' +if C.offset: + out_path = 'output/valmodels/city/%s/off' % (C.scale) +else: + out_path = 'output/valmodels/city/%s/nooff' % (C.scale) +if not os.path.exists(out_path): + os.makedirs(out_path) + epoch = 0 +else: + checkpoint_paths = glob.glob(out_path + "/net*.hdf5") + checkpoint_names = [f.split("/")[-1] for f in checkpoint_paths] + epochs = [*map(int, [f.split("net_e")[1].split("_")[0] for f in checkpoint_names if "net_e" in f])] + max_epoch_idx = np.argmax(epochs) + epoch = epochs[max_epoch_idx] + weight_path = checkpoint_paths[max_epoch_idx] + input_shape_img = (C.size_train[0], C.size_train[1], 3) img_input = Input(shape=input_shape_img) # define the network prediction preds = nn.nn_p3p4p5(img_input, offset=C.offset, num_scale=C.num_scale, trainable=True) preds_tea = nn.nn_p3p4p5(img_input, offset=C.offset, num_scale=C.num_scale, trainable=True) - model = Model(img_input, preds) -if num_gpu>1: + +if num_gpu > 1: from keras_csp.parallel_model import ParallelModel + model = ParallelModel(model, int(num_gpu)) model_stu = Model(img_input, preds) model_tea = Model(img_input, preds_tea) model.load_weights(weight_path, by_name=True) model_tea.load_weights(weight_path, by_name=True) -print 'load weights from {}'.format(weight_path) +print('load weights from {}'.format(weight_path)) -if C.offset: - out_path = 'output/valmodels/city/%s/off' % (C.scale) -else: - out_path = 'output/valmodels/city/%s/nooff' % (C.scale) -if not os.path.exists(out_path): - os.makedirs(out_path) -res_file = os.path.join(out_path,'records.txt') +res_file = os.path.join(out_path, 'records.txt') optimizer = Adam(lr=C.init_lr) if C.offset: model.compile(optimizer=optimizer, loss=[losses.cls_center, losses.regr_h, losses.regr_offset]) else: - if C.scale=='hw': + if C.scale == 'hw': model.compile(optimizer=optimizer, loss=[losses.cls_center, losses.regr_hw]) else: model.compile(optimizer=optimizer, loss=[losses.cls_center, losses.regr_h]) - -epoch_length = int(C.iter_per_epoch/batchsize) +epoch_length = int(C.iter_per_epoch / batchsize) iter_num = 0 add_epoch = 0 losses = np.zeros((epoch_length, 3)) best_loss = np.Inf -print('Starting training with lr {} and alpha {}'.format(C.init_lr, C.alpha)) +print(('Starting training with lr {} and alpha {}'.format(C.init_lr, C.alpha))) start_time = time.time() total_loss_r, cls_loss_r1, regr_loss_r1, offset_loss_r1 = [], [], [], [] -for epoch_num in range(C.num_epochs): +for epoch_num in range(epoch, C.num_epochs): progbar = generic_utils.Progbar(epoch_length) - print('Epoch {}/{}'.format(epoch_num + 1 + add_epoch, C.num_epochs + C.add_epoch)) + print(('Epoch {}/{}'.format(epoch_num + 1 + add_epoch, C.num_epochs + C.add_epoch))) while True: - try: - X, Y = next(data_gen_train) - loss_s1 = model.train_on_batch(X, Y) - - for l in model_tea.layers: - weights_tea = l.get_weights() - if len(weights_tea)>0: - if num_gpu > 1: - weights_stu = model_stu.get_layer(name=l.name).get_weights() - else: - weights_stu = model.get_layer(name=l.name).get_weights() - weights_tea = [C.alpha*w_tea + (1-C.alpha)*w_stu for (w_tea, w_stu) in zip(weights_tea, weights_stu)] - l.set_weights(weights_tea) - # print loss_s1 - losses[iter_num, 0] = loss_s1[1] - losses[iter_num, 1] = loss_s1[2] - if C.offset: - losses[iter_num, 2] = loss_s1[3] - else: - losses[iter_num, 2] = 0 - - iter_num += 1 - if iter_num % 20 == 0: - progbar.update(iter_num, - [('cls', np.mean(losses[:iter_num, 0])), ('regr_h', np.mean(losses[:iter_num, 1])), ('offset', np.mean(losses[:iter_num, 2]))]) - if iter_num == epoch_length: - cls_loss1 = np.mean(losses[:, 0]) - regr_loss1 = np.mean(losses[:, 1]) - offset_loss1 = np.mean(losses[:, 2]) - total_loss = cls_loss1+regr_loss1+offset_loss1 - - total_loss_r.append(total_loss) - cls_loss_r1.append(cls_loss1) - regr_loss_r1.append(regr_loss1) - offset_loss_r1.append(offset_loss1) - print('Total loss: {}'.format(total_loss)) - print('Elapsed time: {}'.format(time.time() - start_time)) - - iter_num = 0 - start_time = time.time() - - if total_loss < best_loss: - print('Total loss decreased from {} to {}, saving weights'.format(best_loss, total_loss)) - best_loss = total_loss - model_tea.save_weights(os.path.join(out_path, 'net_e{}_l{}.hdf5'.format(epoch_num + 1 + add_epoch, total_loss))) - break - except Exception as e: - print ('Exception: {}'.format(e)) - continue + X, Y = next(data_gen_train) + loss_s1 = model.train_on_batch(X, Y) + + for l in model_tea.layers: + weights_tea = l.get_weights() + if len(weights_tea) > 0: + if num_gpu > 1: + weights_stu = model_stu.get_layer(name=l.name).get_weights() + else: + weights_stu = model.get_layer(name=l.name).get_weights() + weights_tea = [C.alpha * w_tea + (1 - C.alpha) * w_stu for (w_tea, w_stu) in + zip(weights_tea, weights_stu)] + l.set_weights(weights_tea) + # print loss_s1 + losses[iter_num, 0] = loss_s1[1] + losses[iter_num, 1] = loss_s1[2] + if C.offset: + losses[iter_num, 2] = loss_s1[3] + else: + losses[iter_num, 2] = 0 + + iter_num += 1 + if iter_num % 20 == 0: + progbar.update(iter_num, + [('cls', np.mean(losses[:iter_num, 0])), ('regr_h', np.mean(losses[:iter_num, 1])), + ('offset', np.mean(losses[:iter_num, 2]))]) + if iter_num == epoch_length: + cls_loss1 = np.mean(losses[:, 0]) + regr_loss1 = np.mean(losses[:, 1]) + offset_loss1 = np.mean(losses[:, 2]) + total_loss = cls_loss1 + regr_loss1 + offset_loss1 + + total_loss_r.append(total_loss) + cls_loss_r1.append(cls_loss1) + regr_loss_r1.append(regr_loss1) + offset_loss_r1.append(offset_loss1) + print(('Total loss: {}'.format(total_loss))) + print(('Elapsed time: {}'.format(time.time() - start_time))) + + iter_num = 0 + start_time = time.time() + + if total_loss < best_loss: + print(('Total loss decreased from {} to {}, saving weights'.format(best_loss, total_loss))) + best_loss = total_loss + model_tea.save_weights( + os.path.join(out_path, 'net_e{}_l{}.hdf5'.format(epoch_num + 1 + add_epoch, total_loss))) + break + records = np.concatenate((np.asarray(total_loss_r).reshape((-1, 1)), np.asarray(cls_loss_r1).reshape((-1, 1)), np.asarray(regr_loss_r1).reshape((-1, 1)), np.asarray(offset_loss_r1).reshape((-1, 1)),), axis=-1) np.savetxt(res_file, np.array(records), fmt='%.6f') -print('Training complete, exiting.') \ No newline at end of file +print('Training complete, exiting.') diff --git a/train_wider.py b/train_wider.py index 404b2fe..34ffd6f 100644 --- a/train_wider.py +++ b/train_wider.py @@ -1,9 +1,8 @@ -from __future__ import division import random import sys, os import time import numpy as np -import cPickle +import pickle from keras.utils import generic_utils from keras.optimizers import Adam from keras.layers import Input @@ -15,7 +14,7 @@ C = config.Config() C.gpu_ids = '0,1,2,3,4,5,6,7' C.onegpu = 4 -C.size_train = (704,704) +C.size_train = (704, 704) C.init_lr = 2e-4 C.offset = True C.scale = 'hw' @@ -29,14 +28,15 @@ # get the training data cache_path = 'data/cache/widerface/train' with open(cache_path, 'rb') as fid: - train_data = cPickle.load(fid) + train_data = pickle.load(fid, encoding='latin1') num_imgs_train = len(train_data) -print 'num of training samples: {}'.format(num_imgs_train) +print('num of training samples: {}'.format(num_imgs_train)) data_gen_train = data_generators.get_data_wider(train_data, C, batchsize=batchsize) # define the base network (resnet here, can be MobileNet, etc) -if C.network=='resnet50': +if C.network == 'resnet50': from keras_csp import resnet50 as nn + weight_path = 'data/models/resnet50_weights_tf_dim_ordering_tf_kernels.h5' input_shape_img = (C.size_train[0], C.size_train[1], 3) @@ -46,15 +46,16 @@ preds_tea = nn.nn_p3p4p5(img_input, offset=C.offset, num_scale=C.num_scale, trainable=True) model = Model(img_input, preds) -if num_gpu>1: +if num_gpu > 1: from keras_csp.parallel_model import ParallelModel + model = ParallelModel(model, int(num_gpu)) model_stu = Model(img_input, preds) model_tea = Model(img_input, preds_tea) model.load_weights(weight_path, by_name=True) model_tea.load_weights(weight_path, by_name=True) -print 'load weights from {}'.format(weight_path) +print('load weights from {}'.format(weight_path)) if C.offset: out_path = 'output/valmodels/wider/%s/off' % (C.scale) @@ -62,7 +63,7 @@ out_path = 'output/valmodels/wider/%s/nooff' % (C.scale) if not os.path.exists(out_path): os.makedirs(out_path) -res_file = os.path.join(out_path,'records.txt') +res_file = os.path.join(out_path, 'records.txt') optimizer = Adam(lr=C.init_lr) if C.offset: @@ -70,18 +71,18 @@ else: model.compile(optimizer=optimizer, loss=[losses.cls_center, losses.regr_hw]) -epoch_length = int(C.iter_per_epoch/batchsize) +epoch_length = int(C.iter_per_epoch / batchsize) iter_num = 0 add_epoch = 0 losses = np.zeros((epoch_length, 3)) best_loss = np.Inf -print('Starting training with lr {} and alpha {}'.format(C.init_lr, C.alpha)) +print(('Starting training with lr {} and alpha {}'.format(C.init_lr, C.alpha))) start_time = time.time() total_loss_r, cls_loss_r1, regr_loss_r1, offset_loss_r1 = [], [], [], [] for epoch_num in range(C.num_epochs): progbar = generic_utils.Progbar(epoch_length) - print('Epoch {}/{}'.format(epoch_num + 1 + add_epoch, C.num_epochs + C.add_epoch)) + print(('Epoch {}/{}'.format(epoch_num + 1 + add_epoch, C.num_epochs + C.add_epoch))) while True: try: X, Y = next(data_gen_train) @@ -89,12 +90,13 @@ for l in model_tea.layers: weights_tea = l.get_weights() - if len(weights_tea)>0: + if len(weights_tea) > 0: if num_gpu > 1: weights_stu = model_stu.get_layer(name=l.name).get_weights() else: weights_stu = model.get_layer(name=l.name).get_weights() - weights_tea = [C.alpha*w_tea + (1-C.alpha)*w_stu for (w_tea, w_stu) in zip(weights_tea, weights_stu)] + weights_tea = [C.alpha * w_tea + (1 - C.alpha) * w_stu for (w_tea, w_stu) in + zip(weights_tea, weights_stu)] l.set_weights(weights_tea) # print loss_s1 losses[iter_num, 0] = loss_s1[1] @@ -107,30 +109,32 @@ iter_num += 1 if iter_num % 20 == 0: progbar.update(iter_num, - [('cls', np.mean(losses[:iter_num, 0])), ('regr_h', np.mean(losses[:iter_num, 1])), ('offset', np.mean(losses[:iter_num, 2]))]) + [('cls', np.mean(losses[:iter_num, 0])), ('regr_h', np.mean(losses[:iter_num, 1])), + ('offset', np.mean(losses[:iter_num, 2]))]) if iter_num == epoch_length: cls_loss1 = np.mean(losses[:, 0]) regr_loss1 = np.mean(losses[:, 1]) offset_loss1 = np.mean(losses[:, 2]) - total_loss = cls_loss1+regr_loss1+offset_loss1 + total_loss = cls_loss1 + regr_loss1 + offset_loss1 total_loss_r.append(total_loss) cls_loss_r1.append(cls_loss1) regr_loss_r1.append(regr_loss1) offset_loss_r1.append(offset_loss1) - print('Total loss: {}'.format(total_loss)) - print('Elapsed time: {}'.format(time.time() - start_time)) + print(('Total loss: {}'.format(total_loss))) + print(('Elapsed time: {}'.format(time.time() - start_time))) iter_num = 0 start_time = time.time() if total_loss < best_loss: - print('Total loss decreased from {} to {}, saving weights'.format(best_loss, total_loss)) + print(('Total loss decreased from {} to {}, saving weights'.format(best_loss, total_loss))) best_loss = total_loss - model_tea.save_weights(os.path.join(out_path, 'net_e{}_l{}.hdf5'.format(epoch_num + 1 + add_epoch, total_loss))) + model_tea.save_weights( + os.path.join(out_path, 'net_e{}_l{}.hdf5'.format(epoch_num + 1 + add_epoch, total_loss))) break except Exception as e: - print ('Exception: {}'.format(e)) + print(('Exception: {}'.format(e))) continue records = np.concatenate((np.asarray(total_loss_r).reshape((-1, 1)), np.asarray(cls_loss_r1).reshape((-1, 1)), @@ -138,4 +142,4 @@ np.asarray(offset_loss_r1).reshape((-1, 1)),), axis=-1) np.savetxt(res_file, np.array(records), fmt='%.6f') -print('Training complete, exiting.') \ No newline at end of file +print('Training complete, exiting.')