diff --git a/configs/ipv4-router.click b/configs/ipv4-router.click
index 23ae469..1f1b7aa 100644
--- a/configs/ipv4-router.click
+++ b/configs/ipv4-router.click
@@ -1,9 +1,7 @@
 FromInput() ->
-// LoadBalanceAdaptiveMeasure() ->
-LoadBalanceThruput() ->
 DropBroadcasts() ->
 CheckIPHeader() ->
-GPUOnly() ->
+LoadBalanceAdaptiveMeasure() ->
 IPlookup() ->
 DecIPTTL() ->
 ToOutput();
diff --git a/configs/ipv6-router.click b/configs/ipv6-router.click
index af847d4..f688327 100644
--- a/configs/ipv6-router.click
+++ b/configs/ipv6-router.click
@@ -1,7 +1,6 @@
 FromInput() ->
-// LoadBalanceAdaptiveMeasure() ->
-LoadBalanceThruput() ->
 CheckIP6Header() ->
+LoadBalanceAdaptiveMeasure() ->
 LookupIP6Route() ->
 DecIP6HLIM() ->
 DropBroadcasts() ->
diff --git a/elements/loadbalancers/LoadBalanceAdaptiveMeasure.hh b/elements/loadbalancers/LoadBalanceAdaptiveMeasure.hh
index 54f58d5..dabac51 100644
--- a/elements/loadbalancers/LoadBalanceAdaptiveMeasure.hh
+++ b/elements/loadbalancers/LoadBalanceAdaptiveMeasure.hh
@@ -15,9 +15,9 @@
 #include <random>
 #include <unistd.h>
 
-#define _LB_MEASURE_PPC_MY_CPU_TIME (1000)
-#define _LB_MEASURE_PPC_MY_CPU_DELTA (50)
-#define _LB_MEASURE_PPC_REPEAT_PER_RATIO (32)
+#define LB_MEASURE_CPU_RATIO_MULTIPLIER (1000)
+#define LB_MEASURE_CPU_RATIO_DELTA (50)
+#define LB_MEASURE_REPTITON_PER_RATIO (8)
 
 namespace nba {
 
@@ -34,7 +34,7 @@ public:
     int get_type() const { return SchedulableElement::get_type() | PerBatchElement::get_type(); }
 
     int initialize() {
-        uniform_dist = std::uniform_int_distribution<int64_t>(0, _LB_MEASURE_PPC_MY_CPU_TIME);
+        uniform_dist = std::uniform_int_distribution<int64_t>(0, LB_MEASURE_CPU_RATIO_MULTIPLIER);
         random_generator = std::default_random_engine();
 
         /* We have only two ranges for CPU and GPU. */
@@ -68,29 +68,27 @@ public:
         int64_t temp_cpu_ratio = rte_atomic64_read(&cpu_ratio);
         local_cpu_ratio = temp_cpu_ratio;
 
-        if (ctx->io_ctx->loc.local_thread_idx == 0) {
+        //if (ctx->io_ctx->loc.local_thread_idx == 0) {
+        if (ctx->io_ctx->loc.core_id == 0) {
             double cpu_ppc = ctx->inspector->pkt_proc_cycles[0];
             double gpu_ppc = ctx->inspector->pkt_proc_cycles[1];
+            double estimated_ppc = (temp_cpu_ratio * cpu_ppc
+                                    + (LB_MEASURE_CPU_RATIO_MULTIPLIER - temp_cpu_ratio) * gpu_ppc)
+                                   / LB_MEASURE_CPU_RATIO_MULTIPLIER;
 
-            printf("[MEASURE] CPU[%f] GPU[%f] Ratio[%f]\n",
-                   cpu_ppc, gpu_ppc, ((double)temp_cpu_ratio)/_LB_MEASURE_PPC_MY_CPU_TIME);
+            printf("[MEASURE:%d] CPU %12f GPU %12f PPC %12f Ratio %.3f\n", ctx->loc.node_id,
+                   cpu_ppc, gpu_ppc, estimated_ppc, ((double)temp_cpu_ratio) / LB_MEASURE_CPU_RATIO_MULTIPLIER);
 
-            if (print_count++ % _LB_MEASURE_PPC_REPEAT_PER_RATIO == 0)
+            if ((print_count++) % LB_MEASURE_REPTITON_PER_RATIO == 0)
             {
-                printf("OLD_RATIO[%f]\n", ((double)temp_cpu_ratio)/_LB_MEASURE_PPC_MY_CPU_TIME);
-                temp_cpu_ratio += _LB_MEASURE_PPC_MY_CPU_DELTA;
-
-                if(temp_cpu_ratio > _LB_MEASURE_PPC_MY_CPU_TIME-_LB_MEASURE_PPC_MY_CPU_DELTA)
+                temp_cpu_ratio += LB_MEASURE_CPU_RATIO_DELTA;
+                if (temp_cpu_ratio > LB_MEASURE_CPU_RATIO_MULTIPLIER - LB_MEASURE_CPU_RATIO_DELTA)
                 {
-                    temp_cpu_ratio = _LB_MEASURE_PPC_MY_CPU_TIME-_LB_MEASURE_PPC_MY_CPU_DELTA;
+                    temp_cpu_ratio = LB_MEASURE_CPU_RATIO_MULTIPLIER - LB_MEASURE_CPU_RATIO_DELTA;
                     printf("END_OF_TEST\n");
                     raise(SIGINT);
                 }
-                if(temp_cpu_ratio < _LB_MEASURE_PPC_MY_CPU_DELTA)
-                    temp_cpu_ratio = _LB_MEASURE_PPC_MY_CPU_DELTA;
-
                 rte_atomic64_set(&cpu_ratio, temp_cpu_ratio);
-                printf("NEW_RATIO[%f]\n", ((double)temp_cpu_ratio)/_LB_MEASURE_PPC_MY_CPU_TIME);
             }
         }
 
diff --git a/lib/elementgraph.cc b/lib/elementgraph.cc
index 13b9665..e3c7f32 100644
--- a/lib/elementgraph.cc
+++ b/lib/elementgraph.cc
@@ -117,7 +117,7 @@ void ElementGraph::flush_delayed_batches()
         PacketBatch *batch = delayed_batches.front();
         delayed_batches.pop_front();
         if (batch->delay_start > 0) {
-            batch->compute_time += (rte_rdtsc() - batch->delay_start);
+            batch->compute_time += (rdtscp() - batch->delay_start);
             batch->delay_start = 0;
         }
 
@@ -193,8 +193,7 @@ void ElementGraph::run(PacketBatch *batch, Element *start_elem, int input_port)
                             if (!ctx->io_ctx->loop_broken)
                                 ev_run(ctx->io_ctx->loop, EVRUN_NOWAIT);
                             /* Keep the current batch for later processing. */
-                            assert(batch->delay_start == 0);
-                            batch->delay_start = rte_rdtsc();
+                            batch->delay_start = rdtscp();
                             delayed_batches.push_back(batch);
                             continue;
                         }
@@ -225,7 +224,7 @@ void ElementGraph::run(PacketBatch *batch, Element *start_elem, int input_port)
                         /* We have no room for batch in the preparing task.
                          * Keep the current batch for later processing. */
                         assert(batch->delay_start == 0);
-                        batch->delay_start = rte_rdtsc();
+                        batch->delay_start = rdtscp();
                         delayed_batches.push_back(batch);
                         continue;
                     }
@@ -271,6 +270,7 @@ void ElementGraph::run(PacketBatch *batch, Element *start_elem, int input_port)
                 batch_disposition = current_elem->_process_batch(input_port, batch);
             }
         }
+        lb_decision = anno_get(&batch->banno, NBA_BANNO_LB_DECISION);
 
         /* If the element was per-batch and it said it will keep the batch,
          * we do not have to perform batch-split operations below. */
diff --git a/lib/io.cc b/lib/io.cc
index 1a51bba..2edaf7b 100644
--- a/lib/io.cc
+++ b/lib/io.cc
@@ -279,7 +279,7 @@ static void comp_process_batch(io_thread_context *ctx, void *pkts, size_t count,
      * "start-of-processing" timestamp.
      * However its ordering is same as we do FIFO here.
      */
-    uint64_t t = rte_rdtsc();
+    uint64_t t = rdtscp();
     batch->count = count;
     batch->recv_timestamp = t;
     batch->batch_id = recv_batch_cnt;