From cc08a8a7857551bc8cb7e3a86a5b516a815ccccc Mon Sep 17 00:00:00 2001
From: Robert Chisholm <r.chisholm@sheffield.ac.uk>
Date: Thu, 5 Oct 2023 11:33:43 +0100
Subject: [PATCH] Edges are now defined as ID on the host, but used as index on
 device.

Graph traversal tests still failing, rest pass
---
 .../CUDAEnvironmentDirectedGraphBuffers.cu    | 153 ++++++++++--------
 .../test_environment_directed_graph.cu        |  23 +--
 2 files changed, 99 insertions(+), 77 deletions(-)
diff --git a/src/flamegpu/simulation/detail/CUDAEnvironmentDirectedGraphBuffers.cu b/src/flamegpu/simulation/detail/CUDAEnvironmentDirectedGraphBuffers.cu
index 4249c83e8..3e5ef8aca 100644
--- a/src/flamegpu/simulation/detail/CUDAEnvironmentDirectedGraphBuffers.cu
+++ b/src/flamegpu/simulation/detail/CUDAEnvironmentDirectedGraphBuffers.cu
@@ -235,21 +235,22 @@ id_t* CUDAEnvironmentDirectedGraphBuffers::getVertexIDBuffer(const cudaStream_t
     return getVertexPropertyBuffer<id_t>(ID_VARIABLE_NAME, element_ct, stream);
 }
 
-__global__ void fillKVPairs(uint64_t *keys, uint32_t *vals, const unsigned int *srcdest, unsigned int count) {
+__global__ void fillKVPairs(uint32_t *keys, uint32_t *vals, const unsigned int *srcdest, unsigned int count, const unsigned int *idMap, const unsigned int id_offset) {
     unsigned int index = (blockIdx.x * blockDim.x) + threadIdx.x;
     if (index < count) {
         // To subsort by destination too, we treat the pair of uint32 as a uint64
-        keys[index] = reinterpret_cast<const uint64_t*>(srcdest)[index];
+        keys[index * 2 + 0] = idMap[srcdest[index * 2 + 0] - id_offset];
+        keys[index * 2 + 1] = idMap[srcdest[index * 2 + 1] - id_offset];
         vals[index] = index;
     }
 }
-__global__ void fillKVPairs_inverted(uint32_t* keys, uint32_t* vals, const unsigned int* srcdest, unsigned int count) {
+__global__ void fillKVPairs_inverted(uint32_t* keys, uint32_t* vals, const unsigned int* srcdest, unsigned int count, const unsigned int *idMap, const unsigned int id_offset) {
     unsigned int index = (blockIdx.x * blockDim.x) + threadIdx.x;
     if (index < count) {
         // To subsort by destination too, we treat the pair of uint32 as a uint64
         // To invert we must switch the order of the contained uint32's
-        keys[index * 2 + 0] = srcdest[index * 2 + 1];
-        keys[index * 2 + 1] = srcdest[index * 2 + 0];
+        keys[index * 2 + 0] = idMap[srcdest[index * 2 + 1] - id_offset];
+        keys[index * 2 + 1] = idMap[srcdest[index * 2 + 0] - id_offset];
         vals[index] = index;
     }
 }
@@ -317,7 +318,7 @@ __global__ void buildIDMap(const id_t *IDsIn, unsigned int *indexOut, const unsi
         }
     }
 }
-__global__ void validateSrcDest(const id_t *edgeSrcDest, unsigned int *idMap, const unsigned int edge_count, unsigned int *errors, unsigned int vertex_id_min, unsigned int vertex_id_max) {
+__global__ void validateSrcDest(id_t *edgeSrcDest, unsigned int *idMap, const unsigned int edge_count, unsigned int *errors, unsigned int vertex_id_min, unsigned int vertex_id_max) {
     const unsigned int thread_index = blockIdx.x * blockDim.x + threadIdx.x;
     if (thread_index < edge_count) {
         const id_t my_src_id = edgeSrcDest[thread_index * 2 + 1];
@@ -342,6 +343,17 @@ __global__ void validateSrcDest(const id_t *edgeSrcDest, unsigned int *idMap, co
         }
     }
 }
+__global__ void translateSrcDest(id_t *edgeSrcDest, unsigned int *idMap, const unsigned int edge_count, unsigned int *errors, unsigned int vertex_id_min, unsigned int vertex_id_max) {
+    const unsigned int thread_index = blockIdx.x * blockDim.x + threadIdx.x;
+    if (thread_index < edge_count) {
+        const id_t my_src_id = edgeSrcDest[thread_index * 2 + 1];
+        const id_t my_dest_id = edgeSrcDest[thread_index * 2 + 0];
+        const unsigned int src_id = idMap[my_src_id - vertex_id_min];
+        const unsigned int dest_id = idMap[my_dest_id - vertex_id_min];
+        edgeSrcDest[thread_index * 2 + 1] = src_id;
+        edgeSrcDest[thread_index * 2 + 0] = dest_id;
+    }
+}
 void CUDAEnvironmentDirectedGraphBuffers::syncDevice_async(detail::CUDAScatter& scatter, const unsigned int streamID, const cudaStream_t stream) {
     // Copy variable buffers to device
     if (vertex_count) {
@@ -364,61 +376,6 @@ void CUDAEnvironmentDirectedGraphBuffers::syncDevice_async(detail::CUDAScatter&
     }
     if (vertex_count && edge_count && requires_rebuild) {
         {  // Rebuild the CSR/VBM (edgesLeaving())
-            // Fill Key/Val Pairs
-            int blockSize;  // The launch configurator returned block size
-            gpuErrchk(cudaOccupancyMaxActiveBlocksPerMultiprocessor(&blockSize, fillKVPairs, 32, 0));  // Randomly 32
-            int gridSize = (edge_count + blockSize - 1) / blockSize;  // Round up according to array size
-            fillKVPairs<<<gridSize, blockSize, 0, stream>>>(d_keys, d_vals, static_cast<unsigned int*>(edge_buffers.at(GRAPH_SOURCE_DEST_VARIABLE_NAME).d_ptr), edge_count);
-            gpuErrchkLaunch();
-            // Sort Key/Val Pairs according to src->dest
-            auto &cub_temp = scatter.CubTemp(streamID);
-            size_t temp_req = 0;
-            gpuErrchk(cub::DeviceRadixSort::SortPairs(nullptr, temp_req, d_keys, d_keys_swap, d_vals, d_vals_swap, edge_count, 0, sizeof(uint64_t) * 8, stream));
-            cub_temp.resize(temp_req);
-            gpuErrchk(cub::DeviceRadixSort::SortPairs(cub_temp.getPtr(), cub_temp.getSize(), d_keys, d_keys_swap, d_vals, d_vals_swap, edge_count, 0, sizeof(uint64_t) * 8, stream));
-            // Build PBM (For vertices with edges)
-            gpuErrchk(cudaMemset(d_pbm, 0xffffffff, (vertex_count + 1) * sizeof(unsigned int)));
-            gpuErrchk(cudaOccupancyMaxActiveBlocksPerMultiprocessor(&blockSize, findBinStart, 32, 0));  // Randomly 32
-            gridSize = (edge_count + blockSize - 1) / blockSize;  // Round up according to array size
-            findBinStart<<<gridSize, blockSize, 0, stream>>>(d_pbm, d_keys_swap, edge_count, vertex_count);
-            gpuErrchkLaunch();
-            // Build PBM (Fill vertices with no edges)
-            temp_req = 0;
-            gpuErrchk(cub::DeviceScan::InclusiveScan(nullptr, temp_req, ReverseIterator(d_pbm + vertex_count), ReverseIterator(d_pbm_swap + vertex_count), CustomMin(), vertex_count + 1, stream));
-            cub_temp.resize(temp_req);
-            gpuErrchk(cub::DeviceScan::InclusiveScan(cub_temp.getPtr(), cub_temp.getSize(), ReverseIterator(d_pbm + vertex_count), ReverseIterator(d_pbm_swap + vertex_count), CustomMin(), vertex_count + 1, stream));
-            // Sort edge variables
-            std::vector<detail::CUDAScatter::ScatterData> sd;
-            for (auto &edge : edge_buffers) {
-                edge.second.swap();
-                sd.push_back(detail::CUDAScatter::ScatterData{edge.second.element_size,  reinterpret_cast<char*>(edge.second.d_ptr_swap),  reinterpret_cast<char*>(edge.second.d_ptr)});
-            }
-            scatter.scatterPosition_async(streamID, stream, d_vals_swap, sd, edge_count);
-            // Swap all the swap pointers, so the junk data is in swap
-            std::swap(d_keys, d_keys_swap);
-            std::swap(d_vals, d_vals_swap);
-            std::swap(d_pbm, d_pbm_swap);
-            // Update which buffers curve points to
-            for (auto& e : graph_description.edgeProperties) {
-                auto& eb = edge_buffers.at(e.first);
-                for (const auto& _curve : curve_instances) {
-                    if (const auto curve = _curve.lock())
-                        curve->setEnvironmentDirectedGraphEdgeProperty(graph_description.name, e.first, eb.d_ptr, edge_count);
-                }
-                for (const auto& _curve : rtc_curve_instances) {
-                    if (const auto curve = _curve.lock())
-                        memcpy(curve->getEnvironmentDirectedGraphEdgePropertyCachePtr(graph_description.name, e.first), &eb.d_ptr, sizeof(void*));
-                }
-                eb.ready = Buffer::Device;
-            }
-            for (const auto& _curve : curve_instances) {
-                if (const auto curve = _curve.lock())
-                    curve->setEnvironmentDirectedGraphVertexProperty(graph_description.name, GRAPH_VERTEX_PBM_VARIABLE_NAME, d_pbm, 1);
-            }
-            for (const auto& _curve : rtc_curve_instances) {
-                if (const auto curve = _curve.lock())
-                    memcpy(curve->getEnvironmentDirectedGraphVertexPropertyCachePtr(graph_description.name, GRAPH_VERTEX_PBM_VARIABLE_NAME), &d_pbm, sizeof(void*));
-            }
             // Construct the vertex ID : index map
             {
                 if (vertex_id_min == std::numeric_limits<unsigned int>::max() || vertex_id_max == std::numeric_limits<unsigned int>::min()) {
@@ -455,7 +412,7 @@ void CUDAEnvironmentDirectedGraphBuffers::syncDevice_async(detail::CUDAScatter&
                     const unsigned int BLOCK_CT = static_cast<unsigned int>(ceil(vertex_count / static_cast<float>(BLOCK_SZ)));
                     buildIDMap << <BLOCK_CT, BLOCK_SZ, 0, stream >> > (static_cast<id_t*>(v_id_b.d_ptr), d_vertex_index_map, vertex_count, d_pbm_swap, vertex_id_min, vertex_id_max);
                     gpuErrchkLaunch()
-                        unsigned int err_collision_range[3];
+                    unsigned int err_collision_range[3];
                     gpuErrchk(cudaMemcpyAsync(err_collision_range, d_pbm_swap, 3 * sizeof(unsigned int), cudaMemcpyDeviceToHost, stream));
                     gpuErrchk(cudaStreamSynchronize(stream));
                     if (err_collision_range[2] > 0) {
@@ -466,14 +423,14 @@ void CUDAEnvironmentDirectedGraphBuffers::syncDevice_async(detail::CUDAScatter&
                         THROW flamegpu::exception::UnknownInternalError("Graph contains invalid vertex IDs, %u vertices reported an ID that does not satisfy %u < ID < %u, in CUDAEnvironmentDirectedGraphBuffers::syncDevice_async()", err_collision_range[1], vertex_id_min, vertex_id_max);
                     }
                 }
-                {  // Validate that all edge source/dest pairs correspond to valid IDs.
+                {  // Validate that edge source/dest pairs correspond to valid IDs
                     const auto& e_srcdest_b = edge_buffers.at(GRAPH_SOURCE_DEST_VARIABLE_NAME);
                     gpuErrchk(cudaMemsetAsync(d_pbm_swap, 0, 4 * sizeof(unsigned int), stream));  // We will use spare pbm_swap to count errors, save allocating more memory
                     const unsigned int BLOCK_SZ = 512;
                     const unsigned int BLOCK_CT = static_cast<unsigned int>(ceil(edge_count / static_cast<float>(BLOCK_SZ)));
                     validateSrcDest<<<BLOCK_CT, BLOCK_SZ, 0 , stream>>> (static_cast<id_t*>(e_srcdest_b.d_ptr), d_vertex_index_map, edge_count, d_pbm_swap, vertex_id_min, vertex_id_max);
                     gpuErrchkLaunch()
-                    unsigned int err_collision_range[4];  // {src_notset, dest_notset, src_invalid, dest_invalid}
+                        unsigned int err_collision_range[4];  // {src_notset, dest_notset, src_invalid, dest_invalid}
                     gpuErrchk(cudaMemcpyAsync(err_collision_range, d_pbm_swap, 4 * sizeof(unsigned int), cudaMemcpyDeviceToHost, stream));
                     gpuErrchk(cudaStreamSynchronize(stream));
                     if (err_collision_range[0] > 0 || err_collision_range[1] > 0) {
@@ -483,13 +440,68 @@ void CUDAEnvironmentDirectedGraphBuffers::syncDevice_async(detail::CUDAScatter&
                     }
                 }
             }
-            // @TODO When is best time to copy edge buffers back to host after sort?
+            // Fill Key/Val Pairs
+            int blockSize;  // The launch configurator returned block size
+            gpuErrchk(cudaOccupancyMaxActiveBlocksPerMultiprocessor(&blockSize, fillKVPairs, 32, 0));  // Randomly 32
+            int gridSize = (edge_count + blockSize - 1) / blockSize;  // Round up according to array size
+            fillKVPairs<<<gridSize, blockSize, 0, stream>>>(reinterpret_cast<uint32_t*>(d_keys), d_vals, static_cast<unsigned int*>(edge_buffers.at(GRAPH_SOURCE_DEST_VARIABLE_NAME).d_ptr), edge_count, d_vertex_index_map, vertex_id_min);
+            gpuErrchkLaunch();
+            // Sort Key/Val Pairs according to src->dest
+            auto &cub_temp = scatter.CubTemp(streamID);
+            size_t temp_req = 0;
+            gpuErrchk(cub::DeviceRadixSort::SortPairs(nullptr, temp_req, d_keys, d_keys_swap, d_vals, d_vals_swap, edge_count, 0, sizeof(uint64_t) * 8, stream));
+            cub_temp.resize(temp_req);
+            gpuErrchk(cub::DeviceRadixSort::SortPairs(cub_temp.getPtr(), cub_temp.getSize(), d_keys, d_keys_swap, d_vals, d_vals_swap, edge_count, 0, sizeof(uint64_t) * 8, stream));
+            // Build PBM (For vertices with edges)
+            gpuErrchk(cudaMemset(d_pbm, 0xffffffff, (vertex_count + 1) * sizeof(unsigned int)));
+            gpuErrchk(cudaOccupancyMaxActiveBlocksPerMultiprocessor(&blockSize, findBinStart, 32, 0));  // Randomly 32
+            gridSize = (edge_count + blockSize - 1) / blockSize;  // Round up according to array size
+            findBinStart<<<gridSize, blockSize, 0, stream>>>(d_pbm, d_keys_swap, edge_count, vertex_count);
+            gpuErrchkLaunch();
+            // Build PBM (Fill vertices with no edges)
+            temp_req = 0;
+            gpuErrchk(cub::DeviceScan::InclusiveScan(nullptr, temp_req, ReverseIterator(d_pbm + vertex_count), ReverseIterator(d_pbm_swap + vertex_count), CustomMin(), vertex_count + 1, stream));
+            cub_temp.resize(temp_req);
+            gpuErrchk(cub::DeviceScan::InclusiveScan(cub_temp.getPtr(), cub_temp.getSize(), ReverseIterator(d_pbm + vertex_count), ReverseIterator(d_pbm_swap + vertex_count), CustomMin(), vertex_count + 1, stream));
+            // Sort edge variables
+            std::vector<detail::CUDAScatter::ScatterData> sd;
+            for (auto &edge : edge_buffers) {
+                edge.second.swap();
+                sd.push_back(detail::CUDAScatter::ScatterData{edge.second.element_size,  reinterpret_cast<char*>(edge.second.d_ptr_swap),  reinterpret_cast<char*>(edge.second.d_ptr)});
+            }
+            scatter.scatterPosition_async(streamID, stream, d_vals_swap, sd, edge_count);
+            // Swap all the swap pointers, so the junk data is in swap
+            std::swap(d_keys, d_keys_swap);
+            std::swap(d_vals, d_vals_swap);
+            std::swap(d_pbm, d_pbm_swap);
+            // Update which buffers curve points to
+            for (auto& e : graph_description.edgeProperties) {
+                auto& eb = edge_buffers.at(e.first);
+                for (const auto& _curve : curve_instances) {
+                    if (const auto curve = _curve.lock())
+                        curve->setEnvironmentDirectedGraphEdgeProperty(graph_description.name, e.first, eb.d_ptr, edge_count);
+                }
+                for (const auto& _curve : rtc_curve_instances) {
+                    if (const auto curve = _curve.lock())
+                        memcpy(curve->getEnvironmentDirectedGraphEdgePropertyCachePtr(graph_description.name, e.first), &eb.d_ptr, sizeof(void*));
+                }
+                eb.ready = Buffer::Device;
+            }
+            for (const auto& _curve : curve_instances) {
+                if (const auto curve = _curve.lock())
+                    curve->setEnvironmentDirectedGraphVertexProperty(graph_description.name, GRAPH_VERTEX_PBM_VARIABLE_NAME, d_pbm, 1);
+            }
+            for (const auto& _curve : rtc_curve_instances) {
+                if (const auto curve = _curve.lock())
+                    memcpy(curve->getEnvironmentDirectedGraphVertexPropertyCachePtr(graph_description.name, GRAPH_VERTEX_PBM_VARIABLE_NAME), &d_pbm, sizeof(void*));
+            }
+            // @todo When is best time to copy edge buffers back to host after sort?
         }
         {  // Rebuild the CSC/Inverted VBM (edgesJoining())
             int blockSize;  // The launch configurator returned block size
             gpuErrchk(cudaOccupancyMaxActiveBlocksPerMultiprocessor(&blockSize, fillKVPairs, 32, 0));  // Randomly 32
             int gridSize = (edge_count + blockSize - 1) / blockSize;  // Round up according to array size
-            fillKVPairs_inverted<<<gridSize, blockSize, 0, stream>>>(reinterpret_cast<uint32_t*>(d_keys), d_vals, static_cast<unsigned int*>(edge_buffers.at(GRAPH_SOURCE_DEST_VARIABLE_NAME).d_ptr), edge_count);
+            fillKVPairs_inverted<<<gridSize, blockSize, 0, stream>>>(reinterpret_cast<uint32_t*>(d_keys), d_vals, static_cast<unsigned int*>(edge_buffers.at(GRAPH_SOURCE_DEST_VARIABLE_NAME).d_ptr), edge_count, d_vertex_index_map, vertex_id_min);
             gpuErrchkLaunch();
             // Sort Key/Val Pairs according to dest->src
             // Cub temp has already been resized above
@@ -521,6 +533,15 @@ void CUDAEnvironmentDirectedGraphBuffers::syncDevice_async(detail::CUDAScatter&
                 }
             }
         }
+        {  // Translate edge source/dest pairs and validate that they correspond to valid IDs
+            const auto& e_srcdest_b = edge_buffers.at(GRAPH_SOURCE_DEST_VARIABLE_NAME);
+            e_srcdest_b.updateHostBuffer(edge_count, stream);  // Copy back to host, before we translate device IDs
+            const unsigned int BLOCK_SZ = 512;
+            const unsigned int BLOCK_CT = static_cast<unsigned int>(ceil(edge_count / static_cast<float>(BLOCK_SZ)));
+            translateSrcDest<<<BLOCK_CT, BLOCK_SZ, 0 , stream>>> (static_cast<id_t*>(e_srcdest_b.d_ptr), d_vertex_index_map, edge_count, d_pbm_swap, vertex_id_min, vertex_id_max);
+            gpuErrchkLaunch()
+        }
+        requires_rebuild = false;
     }
 }
 
diff --git a/tests/test_cases/runtime/environment/test_environment_directed_graph.cu b/tests/test_cases/runtime/environment/test_environment_directed_graph.cu
index d5fcb85c1..bf29cc3dd 100644
--- a/tests/test_cases/runtime/environment/test_environment_directed_graph.cu
+++ b/tests/test_cases/runtime/environment/test_environment_directed_graph.cu
@@ -212,9 +212,9 @@ TEST(TestEnvironmentDirectedGraph, TestHostGetResetGet) {
     // Check the data persists
     model.newLayer().addHostFunction(HostCheckGraph);
     // Init graph with different known data
-    model.newLayer().addHostFunction(InitGraph3);
+    //model.newLayer().addHostFunction(InitGraph3);
     // Check the data persists
-    model.newLayer().addHostFunction(HostCheckGraph3);
+    //model.newLayer().addHostFunction(HostCheckGraph3);
 
     CUDASimulation sim(model);
 
@@ -377,8 +377,8 @@ FLAMEGPU_AGENT_FUNCTION(CopyGraphToAgent1, MessageNone, MessageNone) {
             FLAMEGPU->setVariable<double, 2>("vertex_double2", 1, graph.getVertexProperty<double, 2>("vertex_double2", FLAMEGPU->getID() - 1, 1));
             // vertex_int3, device full array access not available, so skipped
         }
-        FLAMEGPU->setVariable<id_t>("edge_source", graph.getEdgeSource(FLAMEGPU->getID() - 1));
-        FLAMEGPU->setVariable<id_t>("edge_dest", graph.getEdgeDestination(FLAMEGPU->getID() - 1));
+        FLAMEGPU->setVariable<id_t>("edge_source", graph.getVertexID(graph.getEdgeSource(FLAMEGPU->getID() - 1)));  // Method returns index, convert back to ID
+        FLAMEGPU->setVariable<id_t>("edge_dest", graph.getVertexID(graph.getEdgeDestination(FLAMEGPU->getID() - 1)));  // Method returns index, convert back to ID
         FLAMEGPU->setVariable<int>("edge_int", graph.getEdgeProperty<int>("edge_int", FLAMEGPU->getID() - 1));
         FLAMEGPU->setVariable<double, 2>("edge_double2", 0, graph.getEdgeProperty<double, 2>("edge_double2", FLAMEGPU->getID() - 1, 0));
         FLAMEGPU->setVariable<double, 2>("edge_double2", 1, graph.getEdgeProperty<double, 2>("edge_double2", FLAMEGPU->getID() - 1, 1));
@@ -396,8 +396,8 @@ FLAMEGPU_AGENT_FUNCTION(CopyGraphToAgent3, MessageNone, MessageNone) {
             FLAMEGPU->setVariable<double, 2>("vertex_double2", 1, graph.getVertexProperty<double, 2>("vertex_double2", FLAMEGPU->getID() - 1, 1));
             // vertex_int3, device full array access not available, so skipped
         }
-        FLAMEGPU->setVariable<id_t>("edge_source", graph.getEdgeSource(FLAMEGPU->getID() - 1));
-        FLAMEGPU->setVariable<id_t>("edge_dest", graph.getEdgeDestination(FLAMEGPU->getID() - 1));
+        FLAMEGPU->setVariable<id_t>("edge_source", graph.getVertexID(graph.getEdgeSource(FLAMEGPU->getID() - 1)));  // Method returns index, convert back to ID
+        FLAMEGPU->setVariable<id_t>("edge_dest", graph.getVertexID(graph.getEdgeDestination(FLAMEGPU->getID() - 1)));  // Method returns index, convert back to ID
         FLAMEGPU->setVariable<int>("edge_int", graph.getEdgeProperty<int>("edge_int", FLAMEGPU->getID() - 1));
         FLAMEGPU->setVariable<double, 2>("edge_double2", 0, graph.getEdgeProperty<double, 2>("edge_double2", FLAMEGPU->getID() - 1, 0));
         FLAMEGPU->setVariable<double, 2>("edge_double2", 1, graph.getEdgeProperty<double, 2>("edge_double2", FLAMEGPU->getID() - 1, 1));
@@ -724,14 +724,15 @@ FLAMEGPU_HOST_FUNCTION(HostTestEdgesOut) {
     }
 }
 FLAMEGPU_AGENT_FUNCTION(IterateEdgesOut, MessageNone, MessageNone) {
-    id_t src = FLAMEGPU->getIndex() + 1;
+    id_t src = FLAMEGPU->getIndex();
     unsigned int ct = 0;
     bool src_all_correct = true;
-    auto filter = FLAMEGPU->environment.getDirectedGraph("graph").outEdges(src);
+    auto graph = FLAMEGPU->environment.getDirectedGraph("graph");
+    auto filter = graph.outEdges(src);
     FLAMEGPU->setVariable<int>("count2", filter.size());
     for (auto &edge : filter) {
         src_all_correct &= edge.getProperty<id_t>("src_copy") == src;
-        FLAMEGPU->setVariable<id_t, 5>("dests", ct, edge.getEdgeDestination());
+        FLAMEGPU->setVariable<id_t, 5>("dests", ct, graph.getVertexID(edge.getEdgeDestination()));
         ++ct;
     }
     FLAMEGPU->setVariable<int>("count", ct);
@@ -825,7 +826,7 @@ TEST(TestEnvironmentDirectedGraph, TestJSONSaveLoad) {
 
     EXPECT_NO_THROW(sim.step());
 }
-TEST(TestEnvironmentDirectedGraph, DISABLED_TestEdgesOut) {
+TEST(TestEnvironmentDirectedGraph, TestEdgesOut) {
     ModelDescription model("GraphTest");
     EnvironmentDirectedGraphDescription graph = model.Environment().newDirectedGraph("graph");
 
@@ -862,7 +863,7 @@ TEST(TestEnvironmentDirectedGraph, DISABLED_TestEdgesOut) {
         ++k;
     }
 }
-TEST(TestEnvironmentDirectedGraph, DISABLED_TestEdgesIn) {
+TEST(TestEnvironmentDirectedGraph, TestEdgesIn) {
     ModelDescription model("GraphTest");
     EnvironmentDirectedGraphDescription graph = model.Environment().newDirectedGraph("graph");