diff --git a/configs/ipv4-router.click b/configs/ipv4-router.click index 23ae469..1f1b7aa 100644 --- a/configs/ipv4-router.click +++ b/configs/ipv4-router.click @@ -1,9 +1,7 @@ FromInput() -> -// LoadBalanceAdaptiveMeasure() -> -LoadBalanceThruput() -> DropBroadcasts() -> CheckIPHeader() -> -GPUOnly() -> +LoadBalanceAdaptiveMeasure() -> IPlookup() -> DecIPTTL() -> ToOutput(); diff --git a/configs/ipv6-router.click b/configs/ipv6-router.click index af847d4..f688327 100644 --- a/configs/ipv6-router.click +++ b/configs/ipv6-router.click @@ -1,7 +1,6 @@ FromInput() -> -// LoadBalanceAdaptiveMeasure() -> -LoadBalanceThruput() -> CheckIP6Header() -> +LoadBalanceAdaptiveMeasure() -> LookupIP6Route() -> DecIP6HLIM() -> DropBroadcasts() -> diff --git a/elements/loadbalancers/LoadBalanceAdaptiveMeasure.hh b/elements/loadbalancers/LoadBalanceAdaptiveMeasure.hh index 54f58d5..dabac51 100644 --- a/elements/loadbalancers/LoadBalanceAdaptiveMeasure.hh +++ b/elements/loadbalancers/LoadBalanceAdaptiveMeasure.hh @@ -15,9 +15,9 @@ #include #include -#define _LB_MEASURE_PPC_MY_CPU_TIME (1000) -#define _LB_MEASURE_PPC_MY_CPU_DELTA (50) -#define _LB_MEASURE_PPC_REPEAT_PER_RATIO (32) +#define LB_MEASURE_CPU_RATIO_MULTIPLIER (1000) +#define LB_MEASURE_CPU_RATIO_DELTA (50) +#define LB_MEASURE_REPTITON_PER_RATIO (8) namespace nba { @@ -34,7 +34,7 @@ public: int get_type() const { return SchedulableElement::get_type() | PerBatchElement::get_type(); } int initialize() { - uniform_dist = std::uniform_int_distribution(0, _LB_MEASURE_PPC_MY_CPU_TIME); + uniform_dist = std::uniform_int_distribution(0, LB_MEASURE_CPU_RATIO_MULTIPLIER); random_generator = std::default_random_engine(); /* We have only two ranges for CPU and GPU. */ @@ -68,29 +68,27 @@ public: int64_t temp_cpu_ratio = rte_atomic64_read(&cpu_ratio); local_cpu_ratio = temp_cpu_ratio; - if (ctx->io_ctx->loc.local_thread_idx == 0) { + //if (ctx->io_ctx->loc.local_thread_idx == 0) { + if (ctx->io_ctx->loc.core_id == 0) { double cpu_ppc = ctx->inspector->pkt_proc_cycles[0]; double gpu_ppc = ctx->inspector->pkt_proc_cycles[1]; + double estimated_ppc = (temp_cpu_ratio * cpu_ppc + + (LB_MEASURE_CPU_RATIO_MULTIPLIER - temp_cpu_ratio) * gpu_ppc) + / LB_MEASURE_CPU_RATIO_MULTIPLIER; - printf("[MEASURE] CPU[%f] GPU[%f] Ratio[%f]\n", - cpu_ppc, gpu_ppc, ((double)temp_cpu_ratio)/_LB_MEASURE_PPC_MY_CPU_TIME); + printf("[MEASURE:%d] CPU %12f GPU %12f PPC %12f Ratio %.3f\n", ctx->loc.node_id, + cpu_ppc, gpu_ppc, estimated_ppc, ((double)temp_cpu_ratio) / LB_MEASURE_CPU_RATIO_MULTIPLIER); - if (print_count++ % _LB_MEASURE_PPC_REPEAT_PER_RATIO == 0) + if ((print_count++) % LB_MEASURE_REPTITON_PER_RATIO == 0) { - printf("OLD_RATIO[%f]\n", ((double)temp_cpu_ratio)/_LB_MEASURE_PPC_MY_CPU_TIME); - temp_cpu_ratio += _LB_MEASURE_PPC_MY_CPU_DELTA; - - if(temp_cpu_ratio > _LB_MEASURE_PPC_MY_CPU_TIME-_LB_MEASURE_PPC_MY_CPU_DELTA) + temp_cpu_ratio += LB_MEASURE_CPU_RATIO_DELTA; + if (temp_cpu_ratio > LB_MEASURE_CPU_RATIO_MULTIPLIER - LB_MEASURE_CPU_RATIO_DELTA) { - temp_cpu_ratio = _LB_MEASURE_PPC_MY_CPU_TIME-_LB_MEASURE_PPC_MY_CPU_DELTA; + temp_cpu_ratio = LB_MEASURE_CPU_RATIO_MULTIPLIER - LB_MEASURE_CPU_RATIO_DELTA; printf("END_OF_TEST\n"); raise(SIGINT); } - if(temp_cpu_ratio < _LB_MEASURE_PPC_MY_CPU_DELTA) - temp_cpu_ratio = _LB_MEASURE_PPC_MY_CPU_DELTA; - rte_atomic64_set(&cpu_ratio, temp_cpu_ratio); - printf("NEW_RATIO[%f]\n", ((double)temp_cpu_ratio)/_LB_MEASURE_PPC_MY_CPU_TIME); } } diff --git a/lib/elementgraph.cc b/lib/elementgraph.cc index 13b9665..e3c7f32 100644 --- a/lib/elementgraph.cc +++ b/lib/elementgraph.cc @@ -117,7 +117,7 @@ void ElementGraph::flush_delayed_batches() PacketBatch *batch = delayed_batches.front(); delayed_batches.pop_front(); if (batch->delay_start > 0) { - batch->compute_time += (rte_rdtsc() - batch->delay_start); + batch->compute_time += (rdtscp() - batch->delay_start); batch->delay_start = 0; } @@ -193,8 +193,7 @@ void ElementGraph::run(PacketBatch *batch, Element *start_elem, int input_port) if (!ctx->io_ctx->loop_broken) ev_run(ctx->io_ctx->loop, EVRUN_NOWAIT); /* Keep the current batch for later processing. */ - assert(batch->delay_start == 0); - batch->delay_start = rte_rdtsc(); + batch->delay_start = rdtscp(); delayed_batches.push_back(batch); continue; } @@ -225,7 +224,7 @@ void ElementGraph::run(PacketBatch *batch, Element *start_elem, int input_port) /* We have no room for batch in the preparing task. * Keep the current batch for later processing. */ assert(batch->delay_start == 0); - batch->delay_start = rte_rdtsc(); + batch->delay_start = rdtscp(); delayed_batches.push_back(batch); continue; } @@ -271,6 +270,7 @@ void ElementGraph::run(PacketBatch *batch, Element *start_elem, int input_port) batch_disposition = current_elem->_process_batch(input_port, batch); } } + lb_decision = anno_get(&batch->banno, NBA_BANNO_LB_DECISION); /* If the element was per-batch and it said it will keep the batch, * we do not have to perform batch-split operations below. */ diff --git a/lib/io.cc b/lib/io.cc index 1a51bba..2edaf7b 100644 --- a/lib/io.cc +++ b/lib/io.cc @@ -279,7 +279,7 @@ static void comp_process_batch(io_thread_context *ctx, void *pkts, size_t count, * "start-of-processing" timestamp. * However its ordering is same as we do FIFO here. */ - uint64_t t = rte_rdtsc(); + uint64_t t = rdtscp(); batch->count = count; batch->recv_timestamp = t; batch->batch_id = recv_batch_cnt;