diff --git a/prepare/metrics/llm_as_judge/direct/llama_3_3_70b_instruct_adherence_completeness.py b/prepare/metrics/llm_as_judge/direct/llama_3_3_70b_instruct_adherence_completeness.py
deleted file mode 100644
index d8e1a44cb4..0000000000
--- a/prepare/metrics/llm_as_judge/direct/llama_3_3_70b_instruct_adherence_completeness.py
+++ /dev/null
@@ -1,129 +0,0 @@
-from unitxt import add_to_catalog
-from unitxt.inference import CrossProviderInferenceEngine
-from unitxt.llm_as_judge import LLMJudgeDirect
-from unitxt.llm_as_judge_constants import (
-    CriteriaWithOptions,
-)
-
-option_map = {
-    "Excellent": 1.0,
-    "Good": 0.75,
-    "mediocre": 0.5,
-    "Bad": 0.25,
-    "Very Bad": 0,
-}
-
-# First, describe a judgement criteria
-adherence_criteria = CriteriaWithOptions.from_obj(
-    {
-        "name": "adherence_with_format",
-        "description": "The response aligns with the requested structure, style, or format (e.g., bullet points, headings, specific phrasing).",
-        "options": [
-            {
-                "name": "Excellent",
-                "description": "The response perfectly aligns with the requested structure, style, or format, with no deviations.",
-            },
-            {
-                "name": "Good",
-                "description": "The response aligns well with the requested structure, style, or format, with minor deviations that do not affect clarity or usability.",
-            },
-            {
-                "name": "mediocre",
-                "description": "The response generally follows the requested structure, style, or format, but noticeable inconsistencies or omissions are present.",
-            },
-            {
-                "name": "Bad",
-                "description": "The response only partially aligns with the requested structure, style, or format, with significant inconsistencies or a lack of adherence.",
-            },
-            {
-                "name": "Very Bad",
-                "description": "The response fails to align with the requested structure, style, or format.",
-            },
-        ],
-        "option_map": option_map,
-    }
-)
-add_to_catalog(
-    adherence_criteria,
-    f"metrics.llm_as_judge.direct.criteria.{adherence_criteria.name}",
-    overwrite=True,
-)
-
-completeness_criteria = CriteriaWithOptions.from_obj(
-    {
-        "name": "answer_completeness",
-        "description": "The response is complete: all the aspects of the reference answer are addressed in the response. The "
-        "response might use different phrasing or wording from the reference answer.",
-        "options": [
-            {
-                "name": "Excellent",
-                "description": "The response addresses all aspects of the reference answer.",
-            },
-            {
-                "name": "Good",
-                "description": "The response addresses most aspects of the reference answer, with minor omissions.",
-            },
-            {
-                "name": "mediocre",
-                "description": "The response covers the essential aspects of the reference answer but has notable omissions.",
-            },
-            {
-                "name": "Bad",
-                "description": "The response covers only a few aspects of the reference answer, with significant omissions.",
-            },
-            {
-                "name": "Very Bad",
-                "description": "The response fails to address the reference answer meaningfully, with most aspects omitted.",
-            },
-        ],
-        "option_map": option_map,
-    }
-)
-add_to_catalog(
-    completeness_criteria,
-    f"metrics.llm_as_judge.direct.criteria.{completeness_criteria.name}",
-    overwrite=True,
-)
-
-
-# now = define the judge metric using the criteria
-adherence_metric = LLMJudgeDirect(
-    inference_engine=CrossProviderInferenceEngine(  # or your favorite inference model
-        model="llama-3-3-70b-instruct", max_tokens=1024, temperature=0, provider="watsonx"
-    ),
-    criteria=adherence_criteria,
-    # the fields from the generation task to be presented to the judge. Those fields must be present
-    # in the generation task so they can be embedded here
-    context_fields={
-        "question": "question",
-        "instructions": "metadata/template/instruction",
-    },
-    criteria_field="criteria",
-    generate_summaries=False,
-    check_positional_bias=False,
-)
-add_to_catalog(
-    adherence_metric,
-    "metrics.rag.response_generation.adherence_with_format.llama_3_3_70b_instruct_judge",
-    overwrite=True,
-)
-
-# now = define the judge metric using the criteria
-completeness_metric = LLMJudgeDirect(
-    inference_engine=CrossProviderInferenceEngine(  # or your favorite inference model
-        model="llama-3-3-70b-instruct", max_tokens=1024, temperature=0
-    ),
-    criteria=completeness_criteria,
-    # the fields from the generation task to be presented to the judge. Those fields must be present
-    # in the generation task so they can be embedded here
-    context_fields={"question": "question", "reference_answers": "reference_answers"},
-    criteria_field="criteria",
-    generate_summaries=False,
-    check_positional_bias=False,
-)
-
-add_to_catalog(
-    completeness_metric,
-    "metrics.rag.response_generation.answer_completeness.llama_3_3_70b_instruct_judge",
-    overwrite=True,
-)
diff --git a/prepare/metrics/llm_as_judge/llm_as_judge.py b/prepare/metrics/llm_as_judge/llm_as_judge.py
index 9a2e3c02dd..9cffa4621b 100644
--- a/prepare/metrics/llm_as_judge/llm_as_judge.py
+++ b/prepare/metrics/llm_as_judge/llm_as_judge.py
@@ -1,4 +1,4 @@
-from typing import Union
+from typing import Optional, Union
 
 from unitxt import add_to_catalog, get_logger
 from unitxt.inference import CrossProviderInferenceEngine
@@ -8,6 +8,7 @@
     EVALUATOR_TO_MODEL_ID,
     EVALUATORS_METADATA,
     PAIRWISE_CRITERIA,
+    EvaluatorMetadata,
     EvaluatorNameEnum,
     EvaluatorTypeEnum,
     ModelProviderEnum,
@@ -16,17 +17,24 @@
 
 logger = get_logger()
 
-
 def get_evaluator(
     name: EvaluatorNameEnum,
     evaluator_type: EvaluatorTypeEnum,
-    provider: ModelProviderEnum,
+    provider: Optional[ModelProviderEnum] = None,
+    evaluator_params: Optional[dict] = None,
 ) -> Union[LLMJudgeDirect, LLMJudgePairwise]:
     evaluator_metadata = get_evaluator_metadata(name)
-    inference_params = {"max_tokens": 1024, "seed": 42, "temperature": 0, "provider": provider.value}
+    inference_params = {
+        "max_tokens": 1024,
+        "seed": 42,
+        "temperature": 0,
+    }
+    if provider is not None:
+        inference_params["provider"] = provider.value
+
     model_name = EVALUATOR_TO_MODEL_ID[name]
 
-    if provider == ModelProviderEnum.AZURE_OPENAI:
+    if provider is not None and provider == ModelProviderEnum.AZURE_OPENAI:
         inference_params["credentials"] = {}
         inference_params["credentials"]["api_base"] = (
             f"https://eteopenai.azure-api.net/openai/deployments/{model_name}/chat/completions?api-version=2024-08-01-preview"
@@ -42,6 +50,9 @@ def get_evaluator(
         "generate_summaries": False,
     }
 
+    if evaluator_params is not None:
+        params.update(evaluator_params)
+
     evaluator_klass = (
         LLMJudgeDirect
         if evaluator_type == EvaluatorTypeEnum.DIRECT
@@ -51,6 +62,28 @@ def get_evaluator(
     return evaluator_klass(**params)
 
 
+def get_evaluator_catalog_name(
+    evaluator_metadata: EvaluatorMetadata,
+    provider: ModelProviderEnum,
+    prefix: str = "",
+):
+    metric_name = (
+        evaluator_metadata.name.value.lower()
+        .replace("-", "_")
+        .replace(".", "_")
+        .replace(" ", "_")
+    )
+    provider_name = ""
+    # for backward compatibility, ideally we would use cross inference engines provider ids
+    if provider == ModelProviderEnum.AZURE_OPENAI:
+        provider_name = "azure_openai"
+    elif provider == ModelProviderEnum.OPENAI:
+        provider_name = "openai"
+    else:
+        provider_name = provider.value.lower()
+    return f"metrics.{prefix}.{provider_name}.{metric_name}"
+
+
 logger.debug("Registering criteria...")
 # Register all the predefined criterisa
 for criteria in DIRECT_CRITERIA:
@@ -67,36 +100,53 @@ def get_evaluator(
         overwrite=True,
     )
 
-logger.debug("Registering evaluators...")
+
+logger.debug("Registering generic judges (no criterion is set)...")
 for evaluator_metadata in EVALUATORS_METADATA:
     for provider in evaluator_metadata.providers:
         for evaluator_type in [
             EvaluatorTypeEnum.DIRECT,
             EvaluatorTypeEnum.PAIRWISE,
         ]:
-            evaluator = get_evaluator(
-                name=evaluator_metadata.name,
-                evaluator_type=evaluator_type,
-                provider=provider,
-            )
-
-            metric_name = (
-                evaluator_metadata.name.value.lower()
-                .replace("-", "_")
-                .replace(".", "_")
-                .replace(" ", "_")
-            )
-            provider_name = ""
-            # for backward compatibility, ideally we would use cross inference engines provider ids
-            if provider == ModelProviderEnum.AZURE_OPENAI:
-                provider_name = "azure_openai"
-            elif provider == ModelProviderEnum.OPENAI:
-                provider_name = "openai"
-            else:
-                provider_name = provider.value.lower()
-
             add_to_catalog(
-                evaluator,
-                f"metrics.llm_as_judge.{evaluator_type.value}.{provider_name}.{metric_name}",
+                get_evaluator(
+                    name=evaluator_metadata.name,
+                    evaluator_type=evaluator_type,
+                    provider=provider,
+                ),
+                get_evaluator_catalog_name(evaluator_metadata, provider, f"llm_as_judge.{evaluator_type.value}"),
                 overwrite=True,
             )
+
+logger.debug("Registering judges with a specific criterion...")
+add_to_catalog(
+    get_evaluator(
+        name=EvaluatorNameEnum.LLAMA3_3_70B,
+        evaluator_type=EvaluatorTypeEnum.DIRECT,
+        # provider=ModelProviderEnum.WATSONX,
+        evaluator_params={
+            "criteria": "metrics.llm_as_judge.direct.criteria.adherence_with_format",
+            "context_fields": {
+                "question": "question",
+                "instructions": "metadata/template/instruction",
+            },
+        },
+    ),
+    "metrics.rag.response_generation.adherence_with_format.llama_3_3_70b_instruct_judge",
+    overwrite=True,
+)
+
+
+add_to_catalog(
+    get_evaluator(
+        name=EvaluatorNameEnum.LLAMA3_3_70B,
+        evaluator_type=EvaluatorTypeEnum.DIRECT,
+        # provider=ModelProviderEnum.WATSONX,
+        evaluator_params={
+            "criteria": "metrics.llm_as_judge.direct.criteria.answer_completeness",
+            "context_fields": {"question": "question", "reference_answers": "reference_answers"},
+        },
+    ),
+    "metrics.rag.response_generation.answer_completeness.llama_3_3_70b_instruct_judge",
+    overwrite=True,
+)
diff --git a/src/unitxt/catalog/metrics/rag/response_generation/adherence_with_format/llama_3_3_70b_instruct_judge.json b/src/unitxt/catalog/metrics/rag/response_generation/adherence_with_format/llama_3_3_70b_instruct_judge.json
index 3c92fc074d..a29b6f4731 100644
--- a/src/unitxt/catalog/metrics/rag/response_generation/adherence_with_format/llama_3_3_70b_instruct_judge.json
+++ b/src/unitxt/catalog/metrics/rag/response_generation/adherence_with_format/llama_3_3_70b_instruct_judge.json
@@ -2,55 +2,16 @@
     "__type__": "llm_judge_direct",
     "inference_engine": {
         "__type__": "cross_provider_inference_engine",
-        "model": "llama-3-3-70b-instruct",
         "max_tokens": 1024,
+        "seed": 42,
         "temperature": 0,
-        "provider": "watsonx"
-    },
-    "criteria": {
-        "__type__": "criteria_with_options",
-        "name": "adherence_with_format",
-        "description": "The response aligns with the requested structure, style, or format (e.g., bullet points, headings, specific phrasing).",
-        "options": [
-            {
-                "__type__": "criteria_option",
-                "name": "Excellent",
-                "description": "The response perfectly aligns with the requested structure, style, or format, with no deviations."
-            },
-            {
-                "__type__": "criteria_option",
-                "name": "Good",
-                "description": "The response aligns well with the requested structure, style, or format, with minor deviations that do not affect clarity or usability."
-            },
-            {
-                "__type__": "criteria_option",
-                "name": "mediocre",
-                "description": "The response generally follows the requested structure, style, or format, but noticeable inconsistencies or omissions are present."
-            },
-            {
-                "__type__": "criteria_option",
-                "name": "Bad",
-                "description": "The response only partially aligns with the requested structure, style, or format, with significant inconsistencies or a lack of adherence."
-            },
-            {
-                "__type__": "criteria_option",
-                "name": "Very Bad",
-                "description": "The response fails to align with the requested structure, style, or format."
-            }
-        ],
-        "option_map": {
-            "Excellent": 1.0,
-            "Good": 0.75,
-            "mediocre": 0.5,
-            "Bad": 0.25,
-            "Very Bad": 0
-        }
+        "model": "llama-3-3-70b-instruct"
     },
+    "evaluator_name": "LLAMA3_3_70B",
+    "generate_summaries": false,
+    "criteria": "metrics.llm_as_judge.direct.criteria.adherence_with_format",
     "context_fields": {
         "question": "question",
         "instructions": "metadata/template/instruction"
-    },
-    "criteria_field": "criteria",
-    "generate_summaries": false,
-    "check_positional_bias": false
+    }
 }
diff --git a/src/unitxt/catalog/metrics/rag/response_generation/answer_completeness/llama_3_3_70b_instruct_judge.json b/src/unitxt/catalog/metrics/rag/response_generation/answer_completeness/llama_3_3_70b_instruct_judge.json
index 03498fb68b..e99ed6696a 100644
--- a/src/unitxt/catalog/metrics/rag/response_generation/answer_completeness/llama_3_3_70b_instruct_judge.json
+++ b/src/unitxt/catalog/metrics/rag/response_generation/answer_completeness/llama_3_3_70b_instruct_judge.json
@@ -2,54 +2,16 @@
     "__type__": "llm_judge_direct",
     "inference_engine": {
         "__type__": "cross_provider_inference_engine",
-        "model": "llama-3-3-70b-instruct",
         "max_tokens": 1024,
-        "temperature": 0
-    },
-    "criteria": {
-        "__type__": "criteria_with_options",
-        "name": "answer_completeness",
-        "description": "The response is complete: all the aspects of the reference answer are addressed in the response. The response might use different phrasing or wording from the reference answer.",
-        "options": [
-            {
-                "__type__": "criteria_option",
-                "name": "Excellent",
-                "description": "The response addresses all aspects of the reference answer."
-            },
-            {
-                "__type__": "criteria_option",
-                "name": "Good",
-                "description": "The response addresses most aspects of the reference answer, with minor omissions."
-            },
-            {
-                "__type__": "criteria_option",
-                "name": "mediocre",
-                "description": "The response covers the essential aspects of the reference answer but has notable omissions."
-            },
-            {
-                "__type__": "criteria_option",
-                "name": "Bad",
-                "description": "The response covers only a few aspects of the reference answer, with significant omissions."
-            },
-            {
-                "__type__": "criteria_option",
-                "name": "Very Bad",
-                "description": "The response fails to address the reference answer meaningfully, with most aspects omitted."
-            }
-        ],
-        "option_map": {
-            "Excellent": 1.0,
-            "Good": 0.75,
-            "mediocre": 0.5,
-            "Bad": 0.25,
-            "Very Bad": 0
-        }
+        "seed": 42,
+        "temperature": 0,
+        "model": "llama-3-3-70b-instruct"
     },
+    "evaluator_name": "LLAMA3_3_70B",
+    "generate_summaries": false,
+    "criteria": "metrics.llm_as_judge.direct.criteria.answer_completeness",
     "context_fields": {
         "question": "question",
         "reference_answers": "reference_answers"
-    },
-    "criteria_field": "criteria",
-    "generate_summaries": false,
-    "check_positional_bias": false
+    }
 }
diff --git a/src/unitxt/llm_as_judge_constants.py b/src/unitxt/llm_as_judge_constants.py
index 0dfd70638e..cb72cbf7c8 100644
--- a/src/unitxt/llm_as_judge_constants.py
+++ b/src/unitxt/llm_as_judge_constants.py
@@ -953,6 +953,74 @@ class DirectCriteriaCatalogEnum(Enum):
         },
     )
 
+    ADHERENCE_WITH_FORMAT = CriteriaWithOptions(
+        "adherence_with_format",
+        "The response aligns with the requested structure, style, or format (e.g., bullet points, headings, specific phrasing).",
+        [
+            CriteriaOption(
+                "Excellent",
+                "The response perfectly aligns with the requested structure, style, or format, with no deviations.",
+            ),
+            CriteriaOption(
+                "Good",
+                "The response aligns well with the requested structure, style, or format, with minor deviations that do not affect clarity or usability.",
+            ),
+            CriteriaOption(
+                "mediocre",
+                "The response generally follows the requested structure, style, or format, but noticeable inconsistencies or omissions are present.",
+            ),
+            CriteriaOption(
+                "Bad",
+                "The response only partially aligns with the requested structure, style, or format, with significant inconsistencies or a lack of adherence.",
+            ),
+            CriteriaOption(
+                "Very Bad",
+                "The response fails to align with the requested structure, style, or format.",
+            ),
+        ],
+        {
+            "Excellent": 1.0,
+            "Good": 0.75,
+            "mediocre": 0.5,
+            "Bad": 0.25,
+            "Very Bad": 0,
+        },
+    )
+
+    ANSWER_COMPLETENESS = CriteriaWithOptions(
+        "answer_completeness",
+        "The response is complete: all the aspects of the reference answer are addressed in the response. The response might use different phrasing or wording from the reference answer.",
+        [
+            CriteriaOption(
+                "Excellent",
+                "The response addresses all aspects of the reference answer.",
+            ),
+            CriteriaOption(
+                "Good",
+                "The response addresses most aspects of the reference answer, with minor omissions.",
+            ),
+            CriteriaOption(
+                "mediocre",
+                "The response covers the essential aspects of the reference answer but has notable omissions.",
+            ),
+            CriteriaOption(
+                "Bad",
+                "The response covers only a few aspects of the reference answer, with significant omissions.",
+            ),
+            CriteriaOption(
+                "Very Bad",
+                "The response fails to address the reference answer meaningfully, with most aspects omitted.",
+            ),
+        ],
+        {
+            "Excellent": 1.0,
+            "Good": 0.75,
+            "mediocre": 0.5,
+            "Bad": 0.25,
+            "Very Bad": 0,
+        },
+    )
+
 
 DIRECT_CRITERIA = [c.value for c in DirectCriteriaCatalogEnum]