From fa9df0a7bd9d604d03c14dafe64654594dcb22e4 Mon Sep 17 00:00:00 2001 From: Wolf Vollprecht Date: Thu, 25 Jul 2024 03:13:45 -0700 Subject: [PATCH] Implement string contains for lpython (#2787) --- integration_tests/CMakeLists.txt | 1 + integration_tests/test_str_06.py | 11 +++++++++ src/libasr/codegen/asr_to_c_cpp.h | 9 +++++++ src/libasr/codegen/asr_to_llvm.cpp | 31 ++++++++++++++++++++++++ src/libasr/runtime/lfortran_intrinsics.c | 6 +++++ src/libasr/runtime/lfortran_intrinsics.h | 1 + 6 files changed, 59 insertions(+) create mode 100644 integration_tests/test_str_06.py diff --git a/integration_tests/CMakeLists.txt b/integration_tests/CMakeLists.txt index f397765d18..49f5a255ae 100644 --- a/integration_tests/CMakeLists.txt +++ b/integration_tests/CMakeLists.txt @@ -532,6 +532,7 @@ RUN(NAME test_str_02 LABELS cpython llvm llvm_jit c) RUN(NAME test_str_03 LABELS cpython llvm llvm_jit c) RUN(NAME test_str_04 LABELS cpython llvm llvm_jit c wasm) RUN(NAME test_str_05 LABELS cpython llvm llvm_jit c) +RUN(NAME test_str_06 LABELS cpython llvm llvm_jit c) RUN(NAME test_list_01 LABELS cpython llvm llvm_jit c) RUN(NAME test_list_02 LABELS cpython llvm llvm_jit c) RUN(NAME test_list_03 LABELS cpython llvm llvm_jit c NOFAST) diff --git a/integration_tests/test_str_06.py b/integration_tests/test_str_06.py new file mode 100644 index 0000000000..8df130521d --- /dev/null +++ b/integration_tests/test_str_06.py @@ -0,0 +1,11 @@ +def main0(): + x: str + x = "Hello, World" + + assert "Hello" in x + assert "," in x + assert "rld" in x + + assert "Hello" not in "World" + +main0() diff --git a/src/libasr/codegen/asr_to_c_cpp.h b/src/libasr/codegen/asr_to_c_cpp.h index c0404b70f9..22762b9c27 100644 --- a/src/libasr/codegen/asr_to_c_cpp.h +++ b/src/libasr/codegen/asr_to_c_cpp.h @@ -1244,6 +1244,15 @@ PyMODINIT_FUNC PyInit_lpython_module_)" + fn_name + R"((void) { src = "_lfortran_strrepeat_c(" + s + ", " + n + ")"; } + void visit_StringContains(const ASR::StringContains_t &x) { + CHECK_FAST_C_CPP(compiler_options, x) + self().visit_expr(*x.m_left); + std::string substr = src; + self().visit_expr(*x.m_right); + std::string str = src; + src = "_lfortran_str_contains(" + str + ", " + substr + ")"; + } + void visit_Assignment(const ASR::Assignment_t &x) { std::string target; ASR::ttype_t* m_target_type = ASRUtils::expr_type(x.m_target); diff --git a/src/libasr/codegen/asr_to_llvm.cpp b/src/libasr/codegen/asr_to_llvm.cpp index 2e923e9f8e..5f94cfd379 100644 --- a/src/libasr/codegen/asr_to_llvm.cpp +++ b/src/libasr/codegen/asr_to_llvm.cpp @@ -723,6 +723,21 @@ class ASRToLLVMVisitor : public ASR::BaseVisitor return builder->CreateCall(fn, {str, idx1}); } + llvm::Value* lfortran_str_contains(llvm::Value* str, llvm::Value* substr) + { + std::string runtime_func_name = "_lfortran_str_contains"; + llvm::Function *fn = module->getFunction(runtime_func_name); + if (!fn) { + llvm::FunctionType *function_type = llvm::FunctionType::get( + llvm::Type::getInt1Ty(context), { + character_type, character_type + }, false); + fn = llvm::Function::Create(function_type, + llvm::Function::ExternalLinkage, runtime_func_name, *module); + } + return builder->CreateCall(fn, {str, substr}); + } + llvm::Value* lfortran_str_copy(llvm::Value* str, llvm::Value* idx1, llvm::Value* idx2) { std::string runtime_func_name = "_lfortran_str_copy"; @@ -6416,6 +6431,22 @@ class ASRToLLVMVisitor : public ASR::BaseVisitor } } + void visit_StringContains(const ASR::StringContains_t& x) { + if (x.m_value) { + this->visit_expr_wrapper(x.m_value, true); + return; + } + + this->visit_expr_wrapper(x.m_left, true); + llvm::Value *substr = tmp; + + this->visit_expr_wrapper(x.m_right, true); + llvm::Value *right = tmp; + + tmp = lfortran_str_contains(right, substr); + strings_to_be_deallocated.push_back(al, tmp); + } + void visit_StringSection(const ASR::StringSection_t& x) { if (x.m_value) { this->visit_expr_wrapper(x.m_value, true); diff --git a/src/libasr/runtime/lfortran_intrinsics.c b/src/libasr/runtime/lfortran_intrinsics.c index 7c09965c09..8bcbe893cb 100644 --- a/src/libasr/runtime/lfortran_intrinsics.c +++ b/src/libasr/runtime/lfortran_intrinsics.c @@ -2197,6 +2197,12 @@ LFORTRAN_API char* _lfortran_str_item(char* s, int32_t idx) { return res; } +/// Find a substring in a string +LFORTRAN_API bool _lfortran_str_contains(char* str, char* substr) { + char* res = strstr(str, substr); + return res != NULL; +} + // idx1 and idx2 both start from 1 LFORTRAN_API char* _lfortran_str_copy(char* s, int32_t idx1, int32_t idx2) { diff --git a/src/libasr/runtime/lfortran_intrinsics.h b/src/libasr/runtime/lfortran_intrinsics.h index 7215573fde..e857171da5 100644 --- a/src/libasr/runtime/lfortran_intrinsics.h +++ b/src/libasr/runtime/lfortran_intrinsics.h @@ -209,6 +209,7 @@ LFORTRAN_API void _lfortran_free(char* ptr); LFORTRAN_API void _lfortran_alloc(char** ptr, int32_t len); LFORTRAN_API void _lfortran_string_init(int size_plus_one, char *s); LFORTRAN_API char* _lfortran_str_item(char* s, int32_t idx); +LFORTRAN_API bool _lfortran_str_contains(char* str, char* substr); LFORTRAN_API char* _lfortran_str_copy(char* s, int32_t idx1, int32_t idx2); // idx1 and idx2 both start from 1 LFORTRAN_API char* _lfortran_str_slice(char* s, int32_t idx1, int32_t idx2, int32_t step, bool idx1_present, bool idx2_present);