GTAV_ObjectDetection/ObjectDetection.h at master · Inha-HCI/GTAV_ObjectDetection · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
#pragma once

#include <stdlib.h>
#include <ctime>

#include "LiDAR.h"
#include "Functions.h"
#include "CamParams.h"
#include "FrameObjectInfo.h"
#include <opencv2\opencv.hpp>
#include <boost/shared_ptr.hpp>

//#define DEBUG 1

struct WorldObject {
    Entity e;
    std::string type;
    Hash model;
};

static Vector3 createVec3(float x, float y, float z) {
    Vector3 vec;
    vec.x = x;
    vec.y = y;
    vec.z = z;
    return vec;
}

struct SubsetInfo {
    Vector3 kittiPos;
    float kittiHeight;
    float kittiWidth;
    float kittiLength;
    float beta_kitti;
    float rot_y;
    float alpha_kitti;
};

class ObjectDetection {
public:
    ObjectDetection();
    ~ObjectDetection();
private:
    FrameObjectInfo m_curFrame;
    bool m_initialized = false;
    bool m_eve = false;

    Vehicle m_vehicle = NULL;
    Vehicle m_ownVehicle = NULL;
    Player player = NULL;
    Ped ped = NULL;
    Cam camera = NULL;
    Vector3 dir;

    float x, y, z;
    float startHeading;
    int hour, minute;
    const char* _weather;
    const char* _vehicle;

    bool pointclouds;

    std::string baseFolder;

    float currentThrottle = 0.0;
    float currentBrake = 0.0;
    float currentSteering = 0.0;

    Vector3 currentPos;
    Vector3 vehicleForwardVector;
    Vector3 vehicleUpVector;
    Vector3 vehicleRightVector;

    Vector3 m_camForwardVector;
    Vector3 m_camRightVector;
    Vector3 m_camUpVector;

    bool running = false;

    int m_startArea = 1; //Downtown (see s_locationBounds)
    std::vector<std::vector<char>> m_polyGrid;

    //LiDAR variables
    LiDAR lidar;
    bool lidar_initialized = false;
    int pointCloudSize = 0;
    std::unordered_map<int, HitLidarEntity*> m_entitiesHit;
    int lidar_param = 7;

    //Perspective variables
    int m_vPerspective = -1;//Entity ID of perspective vehicle (-1 if self)

    //Depth Map variables
    float* m_pDepth = NULL;
    uint8_t* m_pStencil = NULL;
    float* m_pDMPointClouds = NULL;
    uint16_t* m_pDMImage = NULL;
    uint8_t* m_pStencilImage = NULL;
    unsigned char* color_buf = NULL;
    uint8_t* m_pStencilSeg = NULL;
    int m_stencilSegLength = 0;
    uint32_t* m_pInstanceSeg = NULL;
    int m_instanceSegLength = 0;
    uint8_t* m_pInstanceSegImg = NULL;
    int m_instanceSegImgLength = 0;
    uint8_t* m_pOcclusionImage = NULL;
    uint8_t* m_pUnusedStencilImage = NULL;
    uint8_t* m_pGroundPointsImage = NULL;

    cv::Mat m_depthMat = cv::Mat::zeros(cv::Size(s_camParams.width, s_camParams.height), CV_32FC1);

    std::string m_imgFilename;
    std::string m_veloFilename;
    std::string m_depthFilename;
    std::string m_depthPCFilename;
    std::string m_depthImgFilename;
    std::string m_stencilFilename;
    std::string m_stencilImgFilename;
    std::string m_segImgFilename;
    std::string m_occImgFilename;
    std::string m_unusedPixelsFilename;
    std::string m_calibFilename;
    std::string m_labelsFilename;
    std::string m_labelsUnprocessedFilename;
    std::string m_labelsAugFilename;
    std::string m_groundPointsFilename;
    std::string m_instSegFilename;
    std::string m_instSegImgFilename;
    std::string m_posFilename;
    std::string m_egoObjectFilename;

    std::string m_veloFilenameU;
    std::string m_depthPCFilenameU;

    bool vehicles_created = false;
    std::vector<VehicleToCreate> vehiclesToCreate;
    std::vector<PedToCreate> pedsToCreate;

    //For tracking: first frame in a series that an entity appears
    std::unordered_map<int, int> trackFirstFrame;
    //For calculating real speed
    Vector3 m_trackLastPos;
    float m_trackRealSpeed;
    float m_trackLastRealSpeed;
    float m_trackDist;
    float m_trackDistVar;
    float m_trackDistErrorTotal;
    float m_trackDistErrorTotalVar;
    float m_trackDistErrorTotalCount;
    int m_trackLastIndex = -1;
    int m_trackLastSeqIndex = 0;
    std::string m_timeTrackFile;
    std::string m_usedPixelFile;

    //Camera intrinsic parameters
    float intrinsics[3];

    bool m_vLookupInit = false;
    std::unordered_map<std::string, std::string> m_vLookup; //Vehicle lookup

    std::unordered_map<Vehicle, std::vector<Ped>> m_pedsInVehicles;

    //Map for tracking which entities are possible for each point which is in multiple 3D boxes
    std::unordered_map<int, std::vector<ObjEntity*>> m_overlappingPoints;

public:
    void initCollection(UINT camWidth, UINT camHeight, bool exportEVE = true, int startIndex = 0);
    void setCamParams(float* forwardVec = NULL, float* rightVec = NULL, float* upVec = NULL);
    void setOwnVehicleObject();

    FrameObjectInfo setDepthAndStencil(bool prevDepth = false, float* pDepth = NULL, uint8_t* pStencil = NULL);
    //Depth buffer fn/var needs to be accessed by server
    void setDepthBuffer(bool prevDepth = false);
    bool m_prevDepth = false;

    FrameObjectInfo generateMessage(float* pDepth, uint8_t* pStencil, int entityID = -1);
    void exportDetections(FrameObjectInfo fObjInfo, ObjEntity* vPerspective = NULL);
    void exportImage(BYTE* data, std::string filename = "");
    void increaseIndex();
    std::string getStandardFilename(std::string subDir, std::string extension);

    int instance_index = 0;
    int series_index = 0;
    std::string series_string = "0000";
    std::string instance_string;
    int baseTrackingIndex = instance_index;

    //Tracking variables
    bool collectTracking;
    //# of instances in one series
    const int trSeriesLength = 500;
    //# of seconds between series
    const int trSeriesGapTime = 30;
    //Used for keeing track of when to add the gap
    bool trSeriesGap = false;

    //Other vehicle detection labels
    std::vector<ObjEntity> m_nearbyVehicles;
    ObjEntity m_ownVehicleObj;

private:
    void setVehiclesList();
    void setPedsList();
    void setSpeed();
    void setYawRate();
    void setTime();
    void setupLiDAR();
    void collectLiDAR();
    void setIndex();
    void calcCameraIntrinsics();
    void setFocalLength();
    bool getEntityVector(ObjEntity &entity, int entityID, Hash model, int classid, std::string type, std::string modelString, bool isPedInV, Vehicle vPedIsIn, bool &nearbyVehicle);
    void setPosition();
    Vector3 correctOffcenter(Vector3 position, Vector3 min, Vector3 max, Vector3 forwardVector, Vector3 rightVector, Vector3 upVector, Vector3 &offcenter);
    float observationAngle(Vector3 position);
    void drawVectorFromPosition(Vector3 vector, int blue, int green);
    Vector3 depthToCamCoords(float depth, float screenX, float screenY);
    void outputRealSpeed();
    void setStencilBuffer();
    void setFilenames();

    BBox2D BBox2DFrom3DObject(Vector3 position, Vector3 dim, Vector3 forwardVector, Vector3 rightVector, Vector3 upVector, bool &success, float &truncation);
    bool in3DBox(Vector3 point, Vector3 objPos, Vector3 dim, Vector3 yVector, Vector3 xVector, Vector3 zVector);
    bool in3DBox(ObjEntity *e, Vector3 point, bool &upperHalf);
    bool in2DBoxUnprocessed(const int &i, const int &j, ObjEntity* e);
    bool checkDirection(Vector3 unit, Vector3 point, Vector3 min, Vector3 max);
    //Process pixel instance segmentations with two different methods
    //2D uses only 2D segmentation techniques whereas 3D uses depth buffer
    //Depth buffer hits vehicle windows whereas stencil buffer does not
    void processSegmentation2D();
    void processSegmentation3D();
    std::vector<ObjEntity*> pointInside3DEntities(const Vector3 &worldPos, EntityMap* eMap, const bool &checkUpperVehicle, const uint8_t &stencilVal);
    void processOverlappingPoints();
    void setEntityBBoxParameters(ObjEntity *e);
    void processStencilPixel3D(const uint8_t &stencilVal, const int &j, const int &i, const Vector3 &xVectorCam, const Vector3 &yVectorCam, const Vector3 &zVectorCam);
    void addSegmentedPoint3D(int i, int j, ObjEntity *e);
    void addPointToSegImages(int i, int j, int entityID);
    void printSegImage();

    void update3DPointsHit();
    void update3DPointsHit(ObjEntity* e);

    void processOcclusion();
    void processOcclusionForEntity(ObjEntity *e, const Vector3 &xVectorCam, const Vector3 &yVectorCam, const Vector3 &zVectorCam);

    void getRollAndPitch(Vector3 rightVector, Vector3 forwardVector, Vector3 upVector, float &pitch, float &roll);

    bool hasLOSToEntity(Entity entityID, Vector3 position, Vector3 dim, Vector3 forwardVector, Vector3 rightVector, Vector3 upVector, bool useOrigin = false, Vector3 origin = createVec3(0,0,0));

    void initVehicleLookup();
    bool isPointOccluding(Vector3 worldPos, ObjEntity* e);
    void outputOcclusion();
    void outputUnusedStencilPixels();

    //Export functions
    void exportEntity(ObjEntity e, std::ostringstream& oss, bool unprocessed, bool augmented, bool checkbbox2d = true, const int &maxDist = -1, const int &min2DPoints = -1, const int &min3DPoints = -1);
    void exportEntities(EntityMap entMap, std::ostringstream& oss, bool unprocessed = false, bool augmented = false, bool checkbbox2d = true, const int &maxDist = -1, const int &min2DPoints = -1, const int &min3DPoints = -1);
    void exportCalib();
    void exportPosition();
    void exportEgoObject(ObjEntity vPerspective);

    //Ground plane points
    Vector3 getGroundPoint(Vector3 point, Vector3 yVectorCam, Vector3 xVectorCam, Vector3 zVectorCam);
    void setGroundPlanePoints();

    //Other vehicle detections
    void checkEntity(Vehicle p, WorldObject e, Vector3 pPos, std::ostringstream& oss);
    SubsetInfo getObjectInfoSubset(Vector3 position, Vector3 forwardVector, Vector3 dim);
    Vector3 getVehicleDims(Entity e, Hash model, Vector3 &min, Vector3 &max);
};