diff --git a/README.md b/README.md index 43bbc24..896ffc9 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ Make sure you have it installed in your leader host. 2. Ensure that you have [Julia](https://julialang.org/downloads/platform/) installed before running the setup script, as the default solver is implemented in Julia (Our solver has been tested with `julia version 1.9.3`). Julia's solver offers superior performance, particularly for larger workflows or a higher number of offloading hosts. -Nonetheless, you retain the flexibility to switch to the Gekko solver by modifyiing the value of the `solver` field within the [rankerConfig.ini](https://github.com/ubc-cirrus-lab/unfaasener/blob/main/scheduler/rankerConfig.ini) (set `solver = gekko` to use the Gekko solver and `solver = julia` to use the Julia solver). +Nonetheless, you retain the flexibility to switch to the Gekko solver by modifying the value of the `solver` field within the [rankerConfig.ini](https://github.com/ubc-cirrus-lab/unfaasener/blob/main/scheduler/rankerConfig.ini) (set `solver = gekko` to use the Gekko solver and `solver = julia` to use the Julia solver). 3. In case of leader failure, the leaderFailure variable in the setup.sh needs to be set to 1 (```leaderFailure=1```) in order to retrieve host logs from the datastore. @@ -38,7 +38,7 @@ Then, run the following script to build those components that need to be compile ## Deploying the System To deploy the system, follow these steps: -1. **Giving the required credentials:** To create the necassary credentials for using UnFaaSener, follow the instructions [here](./scheduler/key/). +1. **Giving the required credentials:** To create the necessary credentials for using UnFaaSener, follow the instructions [here](./scheduler/key/). 2. **Adding a new host:** To register a new host to UnFaaSener, you must assign a new Pub/Sub topic to the host and ensure that the host subscribes to this topic. To do this in Google Cloud Functions, follow these steps: 1. **Create a Pub/Sub Topic:** * Access the Pub/Sub service page on the Google Cloud Console. diff --git a/host-agents/execution-agent/README b/host-agents/execution-agent/README index 47aebb5..6fb14b4 100644 --- a/host-agents/execution-agent/README +++ b/host-agents/execution-agent/README @@ -1,2 +1,2 @@ -- python3 vmModule.pyi vm_topic_name vm_name ==> to listen on the pub-sub topic for the VM vm_name -e.g. python3 vmModule.py vmSubscriber1 vm1 +- python3 vmModule.py host_topic_name host_name ==> to listen on the pub-sub topic for the host host_name +e.g. python3 vmModule.py hostSubscriber1 host1 diff --git a/host-agents/execution-agent/vmModule.py b/host-agents/execution-agent/vmModule.py index a15effe..e63fea2 100644 --- a/host-agents/execution-agent/vmModule.py +++ b/host-agents/execution-agent/vmModule.py @@ -582,7 +582,7 @@ def processReqs(jsonfile, before): # This part allows reuse of existing containers , but impacts the usability of the system at high RequestPerSecond # It is disabled to enable the system to create more containers as more requests arrive # These containers are then stopped by the thread - # TO -renable it , just remove the line what sets conts = {} + # TO enable it , just remove the line what sets conts = {} # conts = {} #THis line can be removed to allow reusing containers execution_complete = 0 localcontsPerFunct = contsPerFunct.copy() diff --git a/host-agents/monitoring-agent/agent.cpp b/host-agents/monitoring-agent/agent.cpp index 94600ea..aca8a76 100644 --- a/host-agents/monitoring-agent/agent.cpp +++ b/host-agents/monitoring-agent/agent.cpp @@ -227,18 +227,18 @@ int result = sched_setaffinity(0, sizeof(mask), &mask); size_t mem_violation; if (initialFlag == 1){ if (PREDICTOR_CLASS=="EMA"){ - auto cpu_result = cpu_predictor.compute_predicton_ExponentialMovingAverage((cpu_pred_old),0, 1); + auto cpu_result = cpu_predictor.compute_prediction_ExponentialMovingAverage((cpu_pred_old),0, 1); cpu_pred_old = cpu_result.prediction; cpu_violation = cpu_result.violation; - auto mem_result = mem_predictor.compute_predicton_ExponentialMovingAverage((mem_pred_old),1, 1); + auto mem_result = mem_predictor.compute_prediction_ExponentialMovingAverage((mem_pred_old),1, 1); mem_pred_old = mem_result.prediction; mem_violation = mem_result.violation; } else if (PREDICTOR_CLASS=="MC"){ - auto cpu_result = cpu_predictor.compute_predicton_MarkovChain((cpu_pred_old),0, 1); + auto cpu_result = cpu_predictor.compute_prediction_MarkovChain((cpu_pred_old),0, 1); cpu_pred_old = cpu_result.prediction; cpu_violation = cpu_result.violation; - auto mem_result = mem_predictor.compute_predicton_MarkovChain((mem_pred_old),1, 1); + auto mem_result = mem_predictor.compute_prediction_MarkovChain((mem_pred_old),1, 1); mem_pred_old = mem_result.prediction; mem_violation = mem_result.violation; } @@ -248,18 +248,18 @@ int result = sched_setaffinity(0, sizeof(mask), &mask); else { if (PREDICTOR_CLASS=="EMA"){ - auto cpu_result = cpu_predictor.compute_predicton_ExponentialMovingAverage((cpu_pred_old),0, 0); + auto cpu_result = cpu_predictor.compute_prediction_ExponentialMovingAverage((cpu_pred_old),0, 0); cpu_pred_old = cpu_result.prediction; cpu_violation = cpu_result.violation; - auto mem_result = mem_predictor.compute_predicton_ExponentialMovingAverage((mem_pred_old),1, 0); + auto mem_result = mem_predictor.compute_prediction_ExponentialMovingAverage((mem_pred_old),1, 0); mem_pred_old = mem_result.prediction; mem_violation = mem_result.violation; } else if (PREDICTOR_CLASS=="MC"){ - auto cpu_result = cpu_predictor.compute_predicton_MarkovChain((cpu_pred_old),0, 0); + auto cpu_result = cpu_predictor.compute_prediction_MarkovChain((cpu_pred_old),0, 0); cpu_pred_old = cpu_result.prediction; cpu_violation = cpu_result.violation; - auto mem_result = mem_predictor.compute_predicton_MarkovChain((mem_pred_old),1, 0); + auto mem_result = mem_predictor.compute_prediction_MarkovChain((mem_pred_old),1, 0); mem_pred_old = mem_result.prediction; mem_violation = mem_result.violation; } @@ -395,7 +395,7 @@ int result = sched_setaffinity(0, sizeof(mask), &mask); } } - // update docker utilixation + // update docker utilization prev_docker_utilization_cores_used = docker_cores_used; prev_docker_utilization = docker_cpusum; diff --git a/host-agents/monitoring-agent/predictor.h b/host-agents/monitoring-agent/predictor.h index 7089784..aebefd6 100644 --- a/host-agents/monitoring-agent/predictor.h +++ b/host-agents/monitoring-agent/predictor.h @@ -21,7 +21,7 @@ class predictor double ema_margin = 0.2; double violation_margin = 0.2; int size; - // tje following variables are for the Markiv Chain predictor + // tje following variables are for the Markov Chain predictor double mc_margin = 0.05; double mc_res_margin = 1; double mc_util_res = 4; // 4% @@ -44,7 +44,7 @@ class predictor } } - auto compute_predicton_ExponentialMovingAverage(double old_value, int type, int initialFlag) + auto compute_prediction_ExponentialMovingAverage(double old_value, int type, int initialFlag) { // type: 0 -> cpu, 1 -> memory size_t violation = 0; @@ -75,7 +75,7 @@ class predictor return result {prediction, violation}; } - auto compute_predicton_MarkovChain(double old_value, int type, int initialFlag) + auto compute_prediction_MarkovChain(double old_value, int type, int initialFlag) { // type: 0 -> cpu, 1 -> memory size_t violation = 0; diff --git a/log-parser/get-workflow-logs/getWorkflowLogs.py b/log-parser/get-workflow-logs/getWorkflowLogs.py index 6d760da..d146aa8 100644 --- a/log-parser/get-workflow-logs/getWorkflowLogs.py +++ b/log-parser/get-workflow-logs/getWorkflowLogs.py @@ -29,7 +29,7 @@ def __init__(self, workflow): if __name__ == "__main__": - interuptTime = 60 + interruptTime = 60 initial = int(sys.argv[2]) path = ( str(Path(os.path.dirname(os.path.abspath(__file__))).resolve().parents[1]) @@ -51,7 +51,7 @@ def __init__(self, workflow): print("--- %s seconds ---" % (time.time() - start_time)) else: print("---------getting new logs:---------------") - time.sleep(interuptTime) + time.sleep(interruptTime) while True: start_time = time.time() workflow = sys.argv[1] @@ -66,4 +66,4 @@ def __init__(self, workflow): print("--- %s seconds ---" % (time.time() - start_time)) timeSpent = "time spent: " + str((time.time() - start_time)) logging.info(timeSpent) - time.sleep(interuptTime) + time.sleep(interruptTime) diff --git a/scheduler/Estimator.py b/scheduler/Estimator.py index 2d8c7d9..3aa7fab 100644 --- a/scheduler/Estimator.py +++ b/scheduler/Estimator.py @@ -784,14 +784,14 @@ def compareDistributions(self, func, vmNum): # res = self.distributions_Chisquared_Test(vmDurations, serverlessDuration) return res - def tripleCaseDicision(self, totalVMs): + def tripleCaseDecision(self, totalVMs): thresholdParam = 0.3 individualDecisions = [] for vmNum in range(totalVMs): for func in self.workflowFunctions: - seperateDicision = self.compareDistributions(func, vmNum) - if seperateDicision != "NotFound": - individualDecisions.append(seperateDicision) + separateDecision = self.compareDistributions(func, vmNum) + if separateDecision != "NotFound": + individualDecisions.append(separateDecision) print("individualDecisions, ", individualDecisions) if len(individualDecisions) == 0: return False diff --git a/scheduler/README.md b/scheduler/README.md index 76b67d3..7674000 100644 --- a/scheduler/README.md +++ b/scheduler/README.md @@ -1,6 +1,6 @@ ### Sensitivity of the Solver: -The solutions found by the GEKKO solver is sensitive to values of RTOL and OTOL, and they need to be adjusted together. A lower RTOL value leads to more precise solutions, but caution must be taken as reducing it too much may result in incorrect solutions by surpassing the computer precision [1]. +The solutions found by the Gekko solver is sensitive to values of RTOL and OTOL, and they need to be adjusted together. A lower RTOL value leads to more precise solutions, but caution must be taken as reducing it too much may result in incorrect solutions by surpassing the computer precision [1]. In our experiments, we found that a value of 1e-13 works for us. Depending on the intended use, host system running the solver, and other factors, adjustments to these values may help you achieve better results. You can change RTOL and OTOL by modifying the following code: diff --git a/scheduler/garbageCollector.py b/scheduler/garbageCollector.py index 3ad5e4c..4ac4539 100644 --- a/scheduler/garbageCollector.py +++ b/scheduler/garbageCollector.py @@ -31,8 +31,8 @@ def __init__(self): if __name__ == "__main__": - interuptTime = 60 * 60 - time.sleep(interuptTime) + interruptTime = 60 * 60 + time.sleep(interruptTime) while True: logging.info("GC is running......") logging.info(str(datetime.datetime.now())) @@ -41,4 +41,4 @@ def __init__(self): print("--- %s seconds ---" % (time.time() - start_time)) timeSpent = "time spent: " + str((time.time() - start_time)) logging.info(timeSpent) - time.sleep(interuptTime) + time.sleep(interruptTime) diff --git a/scheduler/resetLastDecisions.py b/scheduler/resetLastDecisions.py index d4cacde..0983f3b 100644 --- a/scheduler/resetLastDecisions.py +++ b/scheduler/resetLastDecisions.py @@ -8,7 +8,7 @@ import datetime -class resetLastDicision: +class resetLastDecision: def __init__(self, workflow, vmNum, mode): workflow = workflow vmNum = vmNum @@ -204,4 +204,4 @@ def __init__(self, workflow, vmNum, mode): workflow = sys.argv[1] vmNum = int(sys.argv[2]) mode = sys.argv[3] - reset = resetLastDicision(workflow, vmNum, mode) + reset = resetLastDecision(workflow, vmNum, mode) diff --git a/scheduler/resetRoutingDecisions.py b/scheduler/resetRoutingDecisions.py index 59dd665..51d31a8 100644 --- a/scheduler/resetRoutingDecisions.py +++ b/scheduler/resetRoutingDecisions.py @@ -8,7 +8,7 @@ import configparser -class resetDicision: +class resetDecision: def __init__(self, workflow, vmNum): self.jsonPath = ( str(Path(os.path.dirname(os.path.abspath(__file__))).resolve().parents[0]) @@ -81,4 +81,4 @@ def resetRouting(self): if __name__ == "__main__": workflow = sys.argv[1] vmNum = sys.argv[2] - reset = resetDicision(workflow, vmNum) + reset = resetDecision(workflow, vmNum) diff --git a/scheduler/rpsCIScheduler.py b/scheduler/rpsCIScheduler.py index f76f98b..eeed6d5 100644 --- a/scheduler/rpsCIScheduler.py +++ b/scheduler/rpsCIScheduler.py @@ -167,7 +167,7 @@ def resolveOffloadingSolutions(self): if mode == "cost": decisionModes = ["default"] else: - totalTripleDecisioin = x.tripleCaseDicision(len(cpus)) + totalTripleDecisioin = x.tripleCaseDecision(len(cpus)) if totalTripleDecisioin == True: decisionModes = ["default", "worst-case", "best-case"] logging.info("latency mode, similar distributions") diff --git a/scheduler/rpsMultiHostSolver.py b/scheduler/rpsMultiHostSolver.py index 42305e3..77624f2 100644 --- a/scheduler/rpsMultiHostSolver.py +++ b/scheduler/rpsMultiHostSolver.py @@ -240,7 +240,7 @@ def GetServerlessCostEstimate(self, offloadingCandidate): def IsOffloaded(self, offloadingCandidate, vm): """ - Returns previous offloadind decision for the function + Returns previous offloading decision for the function """ decision = self.lastDecision[ self.offloadingCandidates.index(offloadingCandidate) @@ -270,7 +270,7 @@ def sameVM(self, node, parent): def getChildIndexes(self, offloadingCandidate): """ - Returns indexes for the children of a function based on sucessors + Returns indexes for the children of a function based on successors """ childrenIndexes = [] children = self.successors[self.offloadingCandidates.index(offloadingCandidate)] @@ -280,7 +280,7 @@ def getChildIndexes(self, offloadingCandidate): def getParentIndexes(self, offloadingCandidate): """ - Returns indexes for the parents of a function based on sucessors + Returns indexes for the parents of a function based on successors """ parentIndexes = [] parents = self.predecessors[ diff --git a/tests/host_agents/predictor_test.cpp b/tests/host_agents/predictor_test.cpp index 83fb0f5..3bc1d55 100644 --- a/tests/host_agents/predictor_test.cpp +++ b/tests/host_agents/predictor_test.cpp @@ -13,7 +13,7 @@ int test_buffer_size_check() { double cpu_pred_old = 0; size_t cpu_violation; try { - auto pred_result = cpu_predictor.compute_predicton_ExponentialMovingAverage(cpu_pred_old, 0, 1); + auto pred_result = cpu_predictor.compute_prediction_ExponentialMovingAverage(cpu_pred_old, 0, 1); } catch(std::exception &e) { cout << "Exp correctly thrown by EMA predictor when buffer size is zero.\n"; return 0; @@ -34,7 +34,7 @@ int test_const_zeros_ema() { cpu_utilization_buffer.push(double(0.0)); double cpu_pred_old = 0; size_t cpu_violation; - auto pred_result = cpu_predictor.compute_predicton_ExponentialMovingAverage(cpu_pred_old, 0, 1); + auto pred_result = cpu_predictor.compute_prediction_ExponentialMovingAverage(cpu_pred_old, 0, 1); std::cout << "Predicted Value with zero trace (EMA): " << pred_result.prediction << "\n"; cpu_violation = pred_result.violation; return 0; @@ -52,7 +52,7 @@ int test_large_values_ema() { cpu_utilization_buffer.push(double(1000.0)); double cpu_pred_old = 0; size_t cpu_violation; - auto pred_result = cpu_predictor.compute_predicton_ExponentialMovingAverage(cpu_pred_old, 0, 1); + auto pred_result = cpu_predictor.compute_prediction_ExponentialMovingAverage(cpu_pred_old, 0, 1); std::cout << "Predicted Value with large values (EMA): " << pred_result.prediction << "\n"; if (pred_result.prediction > 100) { std::cout << "Error: predicted value is larger than the largest value in the trace.\n"; @@ -73,17 +73,17 @@ int test_trace1_ema() { size_t cpu_violation; cpu_utilization_buffer.push(double(0)); - auto pred_result = cpu_predictor.compute_predicton_ExponentialMovingAverage(cpu_pred_old, 0, 1); + auto pred_result = cpu_predictor.compute_prediction_ExponentialMovingAverage(cpu_pred_old, 0, 1); std::cout << "Real: 10, Pred: " << pred_result.prediction << "\n"; cpu_pred_old = pred_result.prediction; for (int i=0; i<10; i++){ cpu_utilization_buffer.push(double(10)); - pred_result = cpu_predictor.compute_predicton_ExponentialMovingAverage(cpu_pred_old, 0, 1); + pred_result = cpu_predictor.compute_prediction_ExponentialMovingAverage(cpu_pred_old, 0, 1); std::cout << "Real: 0, Pred: " << pred_result.prediction << "\n"; cpu_pred_old = pred_result.prediction; cpu_utilization_buffer.push(double(0)); - pred_result = cpu_predictor.compute_predicton_ExponentialMovingAverage(cpu_pred_old, 0, 1); + pred_result = cpu_predictor.compute_prediction_ExponentialMovingAverage(cpu_pred_old, 0, 1); std::cout << "Real: 10, Pred: " << pred_result.prediction << "\n"; cpu_pred_old = pred_result.prediction; } @@ -103,7 +103,7 @@ int test_const_zeros_mc() { cpu_utilization_buffer.push(double(0.0)); double cpu_pred_old = 0; size_t cpu_violation; - auto pred_result = cpu_predictor.compute_predicton_MarkovChain(cpu_pred_old, 0, 1); + auto pred_result = cpu_predictor.compute_prediction_MarkovChain(cpu_pred_old, 0, 1); std::cout << "Predicted Value with zero trace (MC): " << pred_result.prediction << "\n"; return 0; } @@ -120,7 +120,7 @@ int test_large_values_mc() { cpu_utilization_buffer.push(double(1000.0)); double cpu_pred_old = 0; size_t cpu_violation; - auto pred_result = cpu_predictor.compute_predicton_MarkovChain(cpu_pred_old, 0, 1); + auto pred_result = cpu_predictor.compute_prediction_MarkovChain(cpu_pred_old, 0, 1); std::cout << "Predicted Value with large values (MC): " << pred_result.prediction << "\n"; if (pred_result.prediction > 100) { std::cout << "Error: predicted value is larger than the largest value in the trace.\n"; @@ -141,17 +141,17 @@ int test_trace1_mc() { size_t cpu_violation; cpu_utilization_buffer.push(double(0)); - auto pred_result = cpu_predictor.compute_predicton_MarkovChain(cpu_pred_old, 0, 1); + auto pred_result = cpu_predictor.compute_prediction_MarkovChain(cpu_pred_old, 0, 1); std::cout << "Recent:0, Future:10, Pred:" << pred_result.prediction << "\n"; cpu_pred_old = pred_result.prediction; for (int i=0; i<10; i++){ cpu_utilization_buffer.push(double(10)); - pred_result = cpu_predictor.compute_predicton_MarkovChain(cpu_pred_old, 0, 1); + pred_result = cpu_predictor.compute_prediction_MarkovChain(cpu_pred_old, 0, 1); std::cout << "Recent:10, Future:0, Pred:" << pred_result.prediction << "\n"; cpu_pred_old = pred_result.prediction; cpu_utilization_buffer.push(double(0)); - pred_result = cpu_predictor.compute_predicton_MarkovChain(cpu_pred_old, 0, 1); + pred_result = cpu_predictor.compute_prediction_MarkovChain(cpu_pred_old, 0, 1); std::cout << "Recent:0, Future:10, Pred:" << pred_result.prediction << "\n"; cpu_pred_old = pred_result.prediction; }