semgrep.yml

# This file contains Semgrep rules.
# See https://semgrep.dev for more information.
#
# You can use this file locally either with:
#  - docker run --rm -v "${PWD}:/home/repo" returntocorp/semgrep:develop --config semgrep.yml
# or if you have already installed semgrep:
#  - semgrep --config semgrep.yml .
#
# This file is also used in CI, see .circleci/config.yml
#
# Put semgrep-specific rules here.
# Put general OCaml or Python rules in the semgrep-rules repository
# under ocaml/ or python/.

rules:
  - id: no-print-in-semgrep
    patterns:
      - pattern-either:
          - pattern: pr ...
          - pattern: print_string ...
      - pattern-not-inside: |
          if !Flag.debug
          then ...
      - pattern-not-inside: |
          let $F ... =
             ...
          [@@action]
    message: you should not print anything on stdout as it may interfere with
      the JSON output we must return to the semgrep python wrapper.
    languages: [ocaml]
    severity: ERROR
    paths:
      exclude:
        - core_cli/*.ml
        - cli/*.ml
        - cli_*/*.ml
        - Test.ml
        - Matching_report.ml
        - Matching_explanation.ml
        - Unit_*.ml
        - Test_*.ml
        - runner/*.ml
        - experiments/*
        - Check_*.ml
        - libs/*
        - languages/*
        - scripts/*
        - tools/*

  - id: use-pytest-mock
    pattern: import unittest.mock
    message: >-
      Instead of importing unittest.mock,
      use the pytest-mock plugin by requesting the `mocker` fixture.
    languages: [python]
    severity: ERROR
    paths:
      include:
        - tests/

  - id: use-state-for-global-settings
    pattern: global $VAR
    message: |
      Instead of setting global variables,
      keep your variables around on the semgrep.state.SemgrepState class.
      You'll then be able to access this anywhere with:

      from semgrep.state import get_state
      $VAR = get_state().$VAR
    languages: [python]
    severity: ERROR

  - id: not-using-our-pcre-wrappers
    patterns:
      - pattern-either:
          - pattern: Pcre.regexp
          - pattern: Pcre.pmatch
          - pattern: Pcre.exec
          - pattern: Pcre.exec_all
          - pattern: Pcre.split
    message: >-
      You should use one of the equivalent functions in SPcre, which
      automatically sets some flags and handles exceptions.
    languages: [ocaml]
    severity: ERROR
    paths:
      exclude:
        - SPcre.ml

  - id: no-list-map2
    pattern: List.map2
    message: >-
      `List.map2` creates O(N) stack depth, and can lead to a
      stack overflow. Use `Common.map2` instead.
    fix: Common.map2
    languages: [ocaml]
    severity: ERROR
    paths:
      include:
        - src/*
      exclude:
        - profiling/*
        - ppx_profiling/*
        - commons/*
        - graph_code/*
        - lib_parsing/*

  - id: no-list-map
    pattern: List.map
    message: >-
      `List.map` creates O(N) stack depth, and can lead to a
      stack overflow. Use `Common.map` instead.
    fix: Common.map
    languages: [ocaml]
    severity: ERROR
    paths:
      include:
        - src/*
      exclude:
        - profiling/*
        - ppx_profiling/*
        - commons/*
        - graph_code/*
        - lib_parsing/*

  - id: no-list-filter-map
    pattern: List.filter_map
    message: >-
      `List.filter_map` creates O(N) stack depth, and can lead to a
      stack overflow. Use `Common.map_filter` instead.
    fix: Common.map_filter
    languages: [ocaml]
    severity: ERROR
    paths:
      include:
        - src/*
      exclude:
        - profiling/*
        - ppx_profiling/*
        - commons/*
        - graph_code/*
        - lib_parsing/*

  - id: no-list-mapi
    pattern: List.mapi
    message: >-
      `List.mapi` creates O(N) stack depth, and can lead to a
      stack overflow. Use `Common.mapi` instead.
    fix: Common.mapi
    languages: [ocaml]
    severity: ERROR
    paths:
      include:
        - src/*
      exclude:
        - profiling/*
        - ppx_profiling/*
        - commons/*
        - graph_code/*
        - lib_parsing/*

  - id: use-concat-map
    pattern-either:
      - pattern: List.map ... |> List.flatten
      - pattern: Common.map ... |> List.flatten
      - pattern: List.map ... |> List.concat
      - pattern: Common.map ... |> List.concat
      - pattern: ... |> List.map ... |> List.flatten
      - pattern: ... |> Common.map ... |> List.flatten
      - pattern: ... |> List.map ... |> List.concat
      - pattern: ... |> Common.map ... |> List.concat
      - pattern: List.flatten ( List.map ... )
      - pattern: List.flatten ( Common.map ... )
      - pattern: List.concat ( List.map ... )
      - pattern: List.concat ( Common.map ... )
    message: >-
      `List.concat_map` is more efficient and more readable than a `map` followed
      by `concat`.
    languages: [ocaml]
    severity: ERROR
    paths:
      include:
        - src/*

  - id: safe-ast-equality
    patterns:
      - pattern: AST_generic.$X
      - metavariable-regex:
          metavariable: $X
          regex: equal.*
      - pattern-not-inside: AST_utils.with_structural_equal ...
      - pattern-not-inside: AST_utils.with_referential_equal ...
      # Literals can't contain statements within them, so this is safe
      - pattern-not: AST_generic.equal_literal
    message: >-
      The autogenerated AST_generic equality functions must be wrapped in either
      `AST_utils.with_structural_equal` or `AST_utils.with_referential_equal`.
      Otherwise, they will raise if any statements are compared. See
      `AST_utils.ml` for more information.
    languages: [ocaml]
    severity: ERROR
    paths:
      include:
        - src/*

  - id: no-exit-code-1-in-semgrep
    pattern: sys.exit(1)
    fix: sys.exit(2)
    message: >-
      Exit code 1 is reserved for notifying users that blocking findings were found.
      Please use a different exit code, or better yet, a SemgrepError exception.
      For generic fatal errors, we use exit code 2.
    languages: [python]
    severity: ERROR
    paths:
      include:
        - cli/*

  - id: no-env-vars-on-top-level
    patterns:
      - pattern-either:
          - pattern: os.getenv
          - pattern: os.environ
      - pattern-not-inside: "def $F(...): ..."
    message: >-
      If you access environment variables on the top level of a module,
      it'll be near impossible to mock the value of that variable in tests.

      Please make sure to only access environment variables in functions,
      preferably in semgrep.env.Env
    languages: [python]
    severity: ERROR
    paths:
      include:
        - cli/src/*

  - id: typehint-run-semgrep
    patterns:
      - focus-metavariable: $FIXTURE
      - pattern-inside: |
          def $FUNC(..., $FIXTURE, ...): ...
      - pattern-not-inside: |
          def $FUNC(..., $FIXTURE: tests.fixtures.RunSemgrep, ...): ...
      - metavariable-regex:
          metavariable: $FIXTURE
          regex: ^run_semgrep.*$
    message: >-
      Please add a type hint for your use of $FIXTURE.
      It should look like this: `$FIXTURE: RunSemgrep`,
      and you should import it like this: `from tests.fixtures import RunSemgrep`.
    languages: [python]
    severity: ERROR
    paths:
      include:
        - cli/tests/*

  - id: use-git-check-output-helper
    pattern-either:
      - pattern: subprocess.$METHOD(["git", ...], ...)
      - pattern: semgrep.util.sub_check_output(["git", ...], ...)
    message: >-
      We have a helper function git_check_output in meta.py that
      handles printing nice error+debug messages on failure. Use
      that instead of using subprocess
    languages: [python]
    severity: ERROR
    paths:
      include:
        - cli/src/*
  # not ready yet
  #  - id: no-exit-in-semgrep
  #    pattern: |
  #      exit $X
  #    message: do not use directly exit. raise instead UnixExit $X
  #    languages: [ocaml]
  #    severity: ERROR

  # Rules that used to be in pfff.yml and could be put in p/ocaml at some point
  - id: bad-pervasives
    pattern: Pervasives.$X
    message: Pervasives is deprecated and not available after 4.10. Use Stdlib.
    languages: [ocaml]
    severity: ERROR

  - id: physical-inequality
    pattern: $X != $Y
    message: You probably want structural inequality with <>
    languages: [ocaml]
    severity: WARNING

  - id: stupid-equal
    pattern: $X = $X
    message: this will always be true
    languages: [ocaml]
    severity: ERROR

  - id: length-vs-equal
    pattern: List.length $X = 0
    message: you probably want $X = [], which is faster
    languages: [ocaml]
    severity: WARNING

  - id: not-portable-tmp
    pattern: |
      "=~/\/tmp/"
    message: you should probably use Filename.get_temp_dirname().
    languages: [ocaml]
    severity: WARNING
    #TODO: fix at some point
    paths:
      exclude:
        - commons/common2.ml
        - commons_wrappers/graph/graphe.ml
        - h_visualization/plot_jgraph.ml
        - lang_c/analyze/test_analyze_c.ml
        - lang_cpp/parsing/test_parsing_cpp.ml
        - lang_php/analyze/foundation/unit_foundation_php.ml
        - lang_php/analyze/foundation/unit_prolog_php.ml
        - main_db.ml
        - tests/Test.ml
  - id: bad-changelog-extension
    pattern-regex: .*
    message: |
      This file has an invalid extension. Please choose from one of the allowed
      extensions: .added, .changed, .fixed, or .infra
    languages: [generic]
    severity: WARNING
    paths:
      include:
        - changelog.d/*
      exclude:
        - changelog.d/.gitignore
        - changelog.d/README
        - changelog.d/gh-1234.example
        - changelog.d/*.added
        - changelog.d/*.changed
        - changelog.d/*.fixed
        - changelog.d/*.infra
  - id: pass-console-to-rich-progress
    patterns:
      - pattern: rich.progress.track(...)
      - pattern-not: rich.progress.track(..., console=semgrep.console.console)
    message: >-
      You need to pass our custom console to rich progress bars
      so that it progress to the correct output stream (usually stderr).
    languages: [python]
    severity: ERROR

  - id: list-hd
    pattern: List.hd
    message: >-
      Don't use 'List.hd' because it raises an unhelpful exception if its
      argument is an empty list. Prefer a match-with, possibly combined with
      'assert false' e.g. 'match xs with [] -> assert false | x :: _ -> ...'.
    fix: Common.hd_exn "unexpected empty list"
    languages: [ocaml]
    severity: WARNING

  - id: list-tl
    pattern: List.tl
    message: >-
      Don't use 'List.tl' because it raises an unhelpful exception if its
      argument is an empty list. Prefer a match-with, possibly combined with
      'assert false' e.g. 'match xs with [] -> assert false | _ :: xs -> ...'.
    fix: Common.tl_exn "unexpected empty list"
    languages: [ocaml]
    severity: WARNING

  - id: no-metavariable-equal-xyz
    patterns:
      - pattern: Metavariable.$F
      - pattern-not: Metavariable.equal_mvar
      - metavariable-regex:
          metavariable: $F
          regex: equal_.*
    message: >-
      Don't use 'Metavariable.$F' directly, see 'AST_utils'. Instead use
      either 'Metavariable.Structural.$F' or 'Metavariable.Referential.$F'.
      You can also consider replacing `equal_mvalue` with
      'Matching_generic.equal_ast_bound_code'.
    fix: Metavariable.Structural.$F
    languages: [ocaml]
    severity: WARNING

  - id: no-direct-assign-to-id_svalue
    patterns:
      - pattern-not-inside: |
          if no_cycles_in_svalue ... then
            ...
      # FIXME: This doesn't work: $X.id_svalue := Some $E
      - pattern-regex: \s([a-z0-5_]+)\.id_svalue\s+:=\s+Some\s+([a-z0-5_]+)
    message: >-
      Dot not set '$1.id_svalue' directly, instead use
      'Dataflow_svalue.set_svalue_ref' which performs a cycle check.
    fix: Dataflow_svalue.set_svalue_ref $1 $2
    languages: [ocaml]
    severity: WARNING