diff --git a/.ci/pytorch/test.sh b/.ci/pytorch/test.sh
index e443b0713177a..27d28e0e0a378 100755
--- a/.ci/pytorch/test.sh
+++ b/.ci/pytorch/test.sh
@@ -1368,6 +1368,25 @@ test_linux_aarch64(){
        inductor/test_torchinductor_codegen_dynamic_shapes inductor/test_torchinductor_dynamic_shapes --verbose
 }
 
+test_operator_benchmark() {
+  TEST_REPORTS_DIR=$(pwd)/test/test-reports
+  TEST_DIR=$(pwd)
+  CORES=$(lscpu | grep Core | awk '{print $4}')
+  end_core=$(( CORES-1 ))
+
+  cd benchmarks/operator_benchmark/pt_extension
+  python setup.py install
+
+  cd "${TEST_DIR}"/benchmarks/operator_benchmark
+  taskset -c 0-"$end_core" python -m benchmark_all_test --device cpu --output-dir "${TEST_REPORTS_DIR}/operator_benchmark_eager_float32_cpu.csv"
+
+  python benchmarks/operator_benchmark/check_perf_csv.py \
+  --actual "${TEST_REPORTS_DIR}/operator_benchmark_eager_float32_cpu.csv" \
+  --expected "benchmarks/operator_benchmark_eager_float32_cpu_expected.csv"
+
+}
+
+
 if ! [[ "${BUILD_ENVIRONMENT}" == *libtorch* || "${BUILD_ENVIRONMENT}" == *-bazel-* ]]; then
   (cd test && python -c "import torch; print(torch.__config__.show())")
   (cd test && python -c "import torch; print(torch.__config__.parallel_info())")
@@ -1394,6 +1413,8 @@ elif [[ "$TEST_CONFIG" == distributed ]]; then
   if [[ "${SHARD_NUMBER}" == 1 ]]; then
     test_rpc
   fi
+elif [[ "${TEST_CONFIG}" == *cpu_operator_benchmark* ]]; then
+  test_operator_benchmark
 elif [[ "${TEST_CONFIG}" == *inductor_distributed* ]]; then
   test_inductor_distributed
 elif [[ "${TEST_CONFIG}" == *inductor-halide* ]]; then
diff --git a/.github/workflows/operator_benchmark.yml b/.github/workflows/operator_benchmark.yml
new file mode 100644
index 0000000000000..c308d8c7ddfc1
--- /dev/null
+++ b/.github/workflows/operator_benchmark.yml
@@ -0,0 +1,42 @@
+name: operator_benchmark
+
+on:
+  pull_request:
+    branches-ignore:
+      - nightly
+  push:
+    branches:
+      - main
+      - release/*
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
+  cancel-in-progress: true
+
+permissions: read-all
+
+jobs:
+  linux-jammy-cpu-py3_8-gcc11-opbenchmark-build:
+    name: linux-jammy-cpu-py3.8-gcc11-opbenchmark
+    uses: ./.github/workflows/_linux-build.yml
+    with:
+      build-environment: linux-jammy-py3.8-gcc11-build
+      docker-image-name: pytorch-linux-jammy-py3.8-gcc11-opbenchmark-build
+      test-matrix: |
+        { include: [
+          { config: "cpu_operator_benchmark", shard: 1, num_shards: 1, runner: "linux.12xlarge" },
+        ]}
+    secrets:
+      HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
+
+  linux-jammy-cpu-py3_8-gcc11-opbenchmark-test:
+    name: linux-jammy-cpu-py3.8-gcc11-opbenchmark
+    uses: ./.github/workflows/_linux-test.yml
+    needs: linux-jammy-cpu-py3_8-gcc11-opbenchmark-build
+    with:
+      build-environment: linux-jammy-py3.8-gcc11-build
+      docker-image: ${{ needs.linux-jammy-cpu-py3_8-gcc11-opbenchmark-build.outputs.docker-image }}
+      test-matrix: ${{ needs.linux-jammy-cpu-py3_8-gcc11-opbenchmark-build.outputs.test-matrix }}
+    secrets:
+      HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
diff --git a/benchmarks/operator_benchmark/benchmark_core.py b/benchmarks/operator_benchmark/benchmark_core.py
index bc340ae17f67d..9e7b223e1b77c 100644
--- a/benchmarks/operator_benchmark/benchmark_core.py
+++ b/benchmarks/operator_benchmark/benchmark_core.py
@@ -1,7 +1,9 @@
 import ast
 import copy
+import csv
 import functools
 import json
+import os
 import timeit
 from collections import namedtuple
 
@@ -188,7 +190,9 @@ def __init__(self, args):
         self.use_jit = args.use_jit
         self.num_runs = args.num_runs
         self.print_per_iter = False
+        self.output_dir = args.output_dir
         self.operator_range = benchmark_utils.get_operator_range(args.operator_range)
+        self.disable_output = args.disable_output
         # 100 is the default warmup iterations
         if self.args.warmup_iterations == -1:
             self.args.warmup_iterations = 100
@@ -396,8 +400,41 @@ def _print_test_case_info(self, test_case):
 
         return False
 
+    def _output_csv(self, filename, headers, row):
+        if self.args.disable_output is False:
+            return
+        if os.path.exists(filename):
+            with open(filename) as fd:
+                lines = list(csv.reader(fd)) or [[]]
+                if headers and len(headers) > len(lines[0]):
+                    # if prior results failed the header might not be filled in yet
+                    lines[0] = headers
+                else:
+                    headers = lines[0]
+        else:
+            lines = [headers]
+        lines.append([(f"{x:.6f}" if isinstance(x, float) else x) for x in row])
+        with open(filename, "w") as fd:
+            writer = csv.writer(fd, lineterminator="\n")
+            for line in lines:
+                writer.writerow(list(line) + ["0"] * (len(headers) - len(line)))
+
     def run(self):
         self._print_header()
+        DEFAULT_OUTPUT_DIR = "benchmark_logs"
+        mode = "JIT" if self.use_jit else "Eager"
+        output_filename = self.args.output_dir
+        headers = [
+            "Benchmarking Framework",
+            "Benchamrking Module Name",
+            "Case Name",
+            "tag",
+            "run_backward",
+            "Execution Time",
+        ]
+
+        if self.args.disable_output:
+            disable_output = True
 
         for test_metainfo in BENCHMARK_TESTER:
             for test in _build_test(*test_metainfo):
@@ -437,5 +474,18 @@ def run(self):
                     )
                     for _ in range(self.num_runs)
                 ]
-
                 self._print_perf_result(reported_time, test_case)
+
+                # output results to csv
+                self._output_csv(
+                    output_filename,
+                    headers,
+                    [
+                        test_case.framework,
+                        test_case.op_bench.module_name(),
+                        test_case.test_config.test_name,
+                        test_case.test_config.tag,
+                        test_case.test_config.run_backward,
+                        reported_time[0],
+                    ],
+                )
diff --git a/benchmarks/operator_benchmark/benchmark_runner.py b/benchmarks/operator_benchmark/benchmark_runner.py
index 0695e4847e76a..f34a4e39e4dbf 100644
--- a/benchmarks/operator_benchmark/benchmark_runner.py
+++ b/benchmarks/operator_benchmark/benchmark_runner.py
@@ -143,6 +143,17 @@ def parse_args():
         default="None",
     )
 
+    parser.add_argument(
+        "--output-dir",
+        help="Choose the output directory to save the logs",
+        default="DEFAULT_OUTPUT_DIR",
+    )
+    parser.add_argument(
+        "--disable-output",
+        help="Disable log output to csv file",
+        default="False",
+    )
+
     args, _ = parser.parse_known_args()
 
     if args.omp_num_threads: