-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbbNet.cpp
More file actions
235 lines (199 loc) · 8.34 KB
/
Copy pathbbNet.cpp
File metadata and controls
235 lines (199 loc) · 8.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
#include "bbNet.h"
#include "constsUtils.h"
#include <fstream>
// #####################################################################
void bbNet::detectSSD(const cv::Mat& imgL, std::vector<boundBox>& bbL,
const cv::Mat& imgR, std::vector<boundBox>& bbR){
cv::Mat blob = singleImgBlob(imgL);
net.setInput(blob);
cv::Mat output = net.forward();
// The data in det is only pointed at
// if you do not empty this now, it'll be rewritten
// when doing detection on the next image!
cv::Mat det = cv::Mat(output.size[2], output.size[3], CV_32F, output.ptr<float>());
// extract boxes with confidence > confThresh
// and class id < 16 (which is the vaguely)
// car/outdoor/traffic related ones
for(int row = 0; row < det.rows; row++){
if((det.at<float>(row,2) > confThresh) && (det.at<float>(row,1) < 16)){
bbL.push_back(boundBox(row,imgL,det));
bbL[bbL.size()-1].class_id += -1;
}
}
blob = singleImgBlob(imgR);
net.setInput(blob);
output = net.forward();
// The data in det is only pointed at
// if you do not empty this now, it'll be rewritten
// when doing detection on the next image!
det = cv::Mat(output.size[2], output.size[3], CV_32F, output.ptr<float>());
// extract boxes with confidence > confThresh
// and class id < 16 (which is the vaguely)
// car/outdoor/traffic related ones
for(int row = 0; row < det.rows; row++){
if((det.at<float>(row,2) > confThresh) && (det.at<float>(row,1) < 16)){
bbR.push_back(boundBox(row,imgR,det));
bbR[bbR.size()-1].class_id += -1;
}
}
return;
}
// #####################################################################
void bbNet::detectYOLO(const cv::Mat& imgL, std::vector<boundBox>& bbL,
const cv::Mat& imgR, std::vector<boundBox>& bbR){
cv::Mat blob = stackImgBlob(imgL,imgR);
net.setInput(blob);
std::vector<cv::Mat> outputs;
net.forward(outputs, net.getUnconnectedOutLayersNames());
//the image was stacked, so it was col x col
//then it is shrunk to 320 x 320 for detection
float scale_factor = imgL.cols / 320.0;
//I fucking hate this raw pointer shit, but this all works
// and is all tied up in how the network spits out its answers
// so I am leaving it! This is terribly unsafe. If anything changes
// it'll just crash constantly. Don't do this! Change the resolution?
// crash. Change the network to the new version? Crash.
float *data = (float *)outputs[0].data;
const int dimensions = 85;
//1/4 resolution of 25200 (it's a native 640 network, run on 320 imgs)
const int rows = 6300;
std::vector<int> class_ids;
std::vector<float> confidences;
std::vector<cv::Rect> boxes;
for (int i = 0; i < rows; ++i) {
float confidence = data[4];
if (confidence >= confThresh) {
float * classes_scores = data + 5;
cv::Mat scores(1, labels.size(), CV_32FC1, classes_scores);
cv::Point class_id;
double max_class_score;
minMaxLoc(scores, 0, &max_class_score, 0, &class_id);
if (max_class_score > scoreThresh) {
confidences.push_back(confidence);
class_ids.push_back(class_id.x);
float x = data[0];
float y = data[1];
float w = data[2];
float h = data[3];
int left = int((x - 0.5 * w) * scale_factor);
int top = int((y - 0.5 * h) * scale_factor);
int width = int(w * scale_factor);
int height = int(h * scale_factor);
boxes.push_back(cv::Rect(left, top, width, height));
}
}
data += dimensions;
}
std::vector<int> nms_result;
cv::dnn::NMSBoxes(boxes, confidences, scoreThresh, nmsThresh, nms_result);
int rowsL = imgL.rows;
int gap = imgL.cols - (rowsL+imgR.rows);
int rOffset = rowsL+gap;
for (int i = 0; i < nms_result.size(); i++) {
int idx = nms_result[i];
float conf = confidences[idx];
int x0 = boxes[idx].x;
int y0 = boxes[idx].y;
int x1 = boxes[idx].x + boxes[idx].width;
int y1 = boxes[idx].y + boxes[idx].height;
int class_ID = class_ids[idx];
//int ID = i; //for associating bounding boxes
if(y1 < rOffset){
//if the bottom of the box (y1) is above the second image,
// add this box to bbL but don't let the box extend into the gap
y1 = std::min(y1,rowsL);
bbL.push_back(boundBox(conf,x0,y0,x1,y1,class_ID,i));
}else if(y0 > rowsL){
//if the top of the box (y0) is below the first image,
// add this box to bbR but don't let the box extend into the gap
y0 = std::max(y0,rOffset);
bbR.push_back(boundBox(conf,x0,y0-rOffset,x1,y1-rOffset,class_ID,i));
}else{
//this box extends into both images, which is a fictitious space!
// no objects in fictitious spaces allowed, new rule, well known rule.
continue;
}
}
}
// #####################################################################
cv::Mat bbNet::stackImgBlob(const cv::Mat& imgL,const cv::Mat& imgR) const {
//put left image on top of right image, with any remaining space
//from squarification in the middle
if(imgR.cols != imgL.cols){
throw std::runtime_error("Cannot stack images, different column numbers!");
}
int rowL = imgL.rows;
int rowR = imgR.rows;
int col = imgL.cols;
int gap = col-(rowL+rowR);
if(gap <= 0){
throw std::runtime_error("Cannot stack images, cols <= 2*rows (too tall for square stacking)");
}
cv::Mat stacked = cv::Mat::zeros(col, col, CV_8UC3);
imgL.copyTo(stacked(cv::Rect(0, 0, col, rowL)));
imgR.copyTo(stacked(cv::Rect(0, rowL+gap, col, rowR)));
if(netChoice == 0){
//SSD Net
return cv::dnn::blobFromImage(stacked, 1.0, cv::Size(300, 300), cv::mean(imgL), true, false);
}else{
//YOLOv5 Net
return cv::dnn::blobFromImage(stacked, 1./255., cv::Size(320,320), cv::Scalar(), true, false);
}
}
// #####################################################################
cv::Mat bbNet::batchImgBlob(const cv::Mat& imgL,const cv::Mat& imgR) const {
return cv::Mat();
}
// #####################################################################
cv::Mat bbNet::singleImgBlob(const cv::Mat& img) const {
if(netChoice == 0){
//SSD Net
return cv::dnn::blobFromImage(img, 1.0, cv::Size(300, 300), cv::mean(img), true, false);
}else{
//Yolov5 Net
int dimMax = std::max(img.rows,img.cols);
cv::Mat output = cv::Mat::zeros(dimMax, dimMax, CV_8UC3);
img.copyTo(output(cv::Rect(0, 0, img.rows, img.cols)));
return cv::dnn::blobFromImage(output, 1./255., cv::Size(320,320), cv::Scalar(), true, false);
}
}
// #####################################################################
void bbNet::set_netChoice(int netChoice){
std::string dir("nets/");
if(!file_exists(dir)){
dir = "../"+dir;
}
if(!file_exists(dir)){
throw std::runtime_error("Couldn't find ML networks, check this and the above directory!");
}
this->netChoice = netChoice;
switch(netChoice){
case 0: net = cv::dnn::readNet(dir+"frozen_inference_graph.pb",
dir+"ssd_mobilenet_v2_coco_2018_03_29.txt","TensorFlow");
name = std::string("SSD MobileNet v2.0");
break;
case 1: net = cv::dnn::readNet(dir+"yolov5n_320.onnx");
name = std::string("YOLOv5 Nano");
break;
case 2: net = cv::dnn::readNet(dir+"yolov5s_320.onnx");
name = std::string("YOLOv5 Small");
break;
case 3: net = cv::dnn::readNet(dir+"yolov5m_320.onnx");
name = std::string("YOLOv5 Medium");
break;
case 99: name = std::string("The uN-Net"); return;
}
std::ifstream ifs;
if(netChoice == 0){
ifs.open(dir+"ssdClasses.txt");
}else{
ifs.open(dir+"yoloClasses.txt");
}
std::string line;
while (getline(ifs, line))
{
labels.push_back(line);
}
ifs.close();
}
// #####################################################################