From e4c03d83b5c2d8cd90cefe8f49d9ab5dcdee1070 Mon Sep 17 00:00:00 2001 From: wuwenyi <773323518@qq.com> Date: Fri, 10 May 2024 00:14:25 +0800 Subject: [PATCH 1/2] add fail_script in conditon --- crawlipt/annotation.py | 2 +- crawlipt/script.py | 80 +++++++++++++++++++++++++++++++----------- test.py | 11 +++--- 3 files changed, 65 insertions(+), 28 deletions(-) diff --git a/crawlipt/annotation.py b/crawlipt/annotation.py index 6a8f8d0..2bbc3c7 100644 --- a/crawlipt/annotation.py +++ b/crawlipt/annotation.py @@ -42,7 +42,7 @@ def inner_wrapper(*args, **kwargs): if type_.annotation == type_.empty: raise ParamTypeError(f"Parameter {name} must be indicated the type.") if name not in all_kwargs: - raise ParamTypeError(f"Parameter {name} is not in the defined parameter list.") + raise ParamTypeError(f"Parameter {name} is in the defined parameter list, but missing.") if all_kwargs[name] is None and type_.default is not type_.empty: continue if all_kwargs[name] == "__PRE_RETURN__": diff --git a/crawlipt/script.py b/crawlipt/script.py index 444b474..dbdfb36 100644 --- a/crawlipt/script.py +++ b/crawlipt/script.py @@ -65,7 +65,7 @@ def get_dict(obj): class ScriptProcess: ACTIONS = get_dict(Action) CONDITIONS = get_dict(Condition) - __POP_KEY = {"method", "next", "if", "check", "condition", "loop", "return_flag", "while"} + __POP_KEY = {"method", "next", "if", "check", "condition", "loop", "return_flag", "while", "fail_script"} @staticmethod @check @@ -78,6 +78,11 @@ def __condition_check(temp_condition: dict, name: str, pre_deep: str, current_de if condition not in ScriptProcess.CONDITIONS.keys(): msg = "(Deep %s) Could not found the Condition Method in '%s'" % (pre_deep + str(current_deep), name) raise ScriptSyntaxError(ParamTypeError(msg), condition, pre_deep + str(current_deep)) + fail_script = temp_condition.get("fail_script") + if fail_script: + ScriptProcess.syntax_check(script=fail_script, + pre_deep=pre_deep + str(current_deep) + "->", + return_record=return_record) return_flag = temp_condition.get("return_flag") if return_flag and not isinstance(return_flag, str): msg = "(Deep %s) return_flag must be the type of str" % (pre_deep + str(current_deep)) @@ -85,6 +90,8 @@ def __condition_check(temp_condition: dict, name: str, pre_deep: str, current_de for key, value in temp_condition.items(): if key.lower() not in ScriptProcess.__POP_KEY: temp_args[key] = value + if "store" in signature(ScriptProcess.CONDITIONS[condition]).parameters: + temp_args["store"] = None if return_record: for key, value in temp_args.items(): if isinstance(value, str) and value.startswith("__rf-") and value.endswith("__"): @@ -141,8 +148,6 @@ def syntax_check(script: dict | str, pre_deep: str = "", return_record: dict = { ScriptProcess.syntax_check(script=loop_script, pre_deep=pre_deep + str(current_deep) + "->", return_record=return_record) - script = script.get("next") - continue check_condition = script.get("check") if check_condition: ScriptProcess.__condition_check(temp_condition=check_condition, @@ -152,6 +157,9 @@ def syntax_check(script: dict | str, pre_deep: str = "", return_record: dict = { current_deep=current_deep) if_condition = script.get("if") if if_condition: + if not script.get("method"): + msg = "(Deep %s) The 'if' condition must be with a Action Method" % (pre_deep + str(current_deep)) + raise ScriptSyntaxError(ParamTypeError(msg), "", pre_deep + str(current_deep)) ScriptProcess.__condition_check(temp_condition=if_condition, name="if", return_record=return_record, @@ -160,8 +168,8 @@ def syntax_check(script: dict | str, pre_deep: str = "", return_record: dict = { temp_args = {"driver": None} method = script.get("method") if not method: - msg = "(Deep %s) Method is missing" % (pre_deep + str(current_deep)) - raise ScriptSyntaxError(ParamTypeError(msg), "", pre_deep + str(current_deep)) + script = script.get("next") + continue if method not in ScriptProcess.ACTIONS.keys(): msg = "(Deep %s) Could not found the Action Method" % (pre_deep + str(current_deep)) raise ScriptSyntaxError(ParamTypeError(msg), method, pre_deep + str(current_deep)) @@ -210,13 +218,16 @@ def syntax_check(script: dict | str, pre_deep: str = "", return_record: dict = { @staticmethod @check - def __process_condition(temp_condition: dict, webdriver: WebDriver, return_record: dict) -> bool: + def __process_condition(temp_condition: dict, webdriver: WebDriver, return_record: dict, + global_script: dict, interval: float, wait: float, store: StoreBase = None) -> bool: condition = temp_condition.get("condition") return_flag = temp_condition.get("return_flag") temp_args = {"driver": webdriver} for key, value in temp_condition.items(): if key.lower() not in ScriptProcess.__POP_KEY: temp_args[key] = value + if "store" in signature(ScriptProcess.CONDITIONS[condition]).parameters: + temp_args["store"] = store if return_record: for key, value in temp_args.items(): if isinstance(value, str) and value.startswith("__rf-") and value.endswith("__"): @@ -224,6 +235,15 @@ def __process_condition(temp_condition: dict, webdriver: WebDriver, return_recor is_success = ScriptProcess.CONDITIONS[condition](**temp_args) if return_flag: return_record[return_flag] = is_success + if not is_success: + fail_script = temp_condition.get("fail_script") + if fail_script: + ScriptProcess._process_script(script=fail_script, + global_script=global_script, + webdriver=webdriver, + interval=interval, + wait=wait, + store=store,) return is_success @staticmethod @@ -244,7 +264,11 @@ def _process_script(script: dict, global_script: dict, webdriver: WebDriver, sto if while_condition and cnt: while ScriptProcess.__process_condition(temp_condition=while_condition, return_record=return_record, - webdriver=webdriver) and cnt: + global_script=global_script, + webdriver=webdriver, + store=store, + interval=interval, + wait=wait) and cnt: ScriptProcess._process_script(script=loop_script, global_script=global_script, webdriver=webdriver, @@ -253,12 +277,14 @@ def _process_script(script: dict, global_script: dict, webdriver: WebDriver, sto interval=interval, wait=wait) cnt -= 1 - script = script.get("next") - continue if while_condition: while ScriptProcess.__process_condition(temp_condition=while_condition, return_record=return_record, - webdriver=webdriver): + global_script=global_script, + webdriver=webdriver, + store=store, + interval=interval, + wait=wait): ScriptProcess._process_script(script=loop_script, global_script=global_script, webdriver=webdriver, @@ -266,8 +292,6 @@ def _process_script(script: dict, global_script: dict, webdriver: WebDriver, sto store=store, interval=interval, wait=wait) - script = script.get("next") - continue if cnt: for _ in range(cnt): ScriptProcess._process_script(script=loop_script, @@ -277,25 +301,34 @@ def _process_script(script: dict, global_script: dict, webdriver: WebDriver, sto store=store, interval=interval, wait=wait) - script = script.get("next") - continue check_condition = script.get("check") if check_condition: is_success = ScriptProcess.__process_condition(temp_condition=check_condition, return_record=return_record, - webdriver=webdriver) + global_script=global_script, + webdriver=webdriver, + store=store, + interval=interval, + wait=wait) if not is_success: return if_condition = script.get("if") if if_condition: is_success = ScriptProcess.__process_condition(temp_condition=if_condition, return_record=return_record, - webdriver=webdriver) + global_script=global_script, + webdriver=webdriver, + store=store, + interval=interval, + wait=wait) if not is_success: script = script.get("next") continue temp_args = {"driver": webdriver} method = script.get("method") + if not method: + script = script.get("next") + continue for key, value in script.items(): if key.lower() not in ScriptProcess.__POP_KEY: temp_args[key] = value @@ -345,7 +378,7 @@ def _replace_variable(script: dict, variable: VariableBase) -> None: raise VariableError(msg) script[key] = variable.get(variable_name) for key in script.keys(): - if key in {"loop", "if", "check", "while", "script"}: + if key in {"loop", "if", "check", "while", "script", "fail_script"}: ScriptProcess._replace_variable(script=script[key], variable=variable) script = script.get("next") @@ -362,10 +395,15 @@ def generate(scripts: list | dict | str) -> dict: res = {} temp = res for i in range(len(scripts)): - if "loop" in scripts[i].keys(): - loop_temp = scripts[i]["loop"] - if "script" in loop_temp: - loop_temp["script"] = ScriptProcess.generate(loop_temp["script"]) + for k in scripts[i].keys(): + if k == "loop": + loop_temp = scripts[i]["loop"] + if "script" in loop_temp: + loop_temp["script"] = ScriptProcess.generate(loop_temp["script"]) + if k in {"if", "check", "while"}: + judge_temp = scripts[i][k] + if "fail_script" in judge_temp: + judge_temp["fail_script"] = ScriptProcess.generate(judge_temp["fail_script"]) temp.update(scripts[i]) if i != len(scripts) - 1: while temp.get("next"): diff --git a/test.py b/test.py index c54d0ea..d80ba57 100644 --- a/test.py +++ b/test.py @@ -262,6 +262,9 @@ def checkNum(driver: WebDriver, xpath: str) -> bool: "method": "redirect", "url": "https://www.bchrt.com/tools/click-counter/", }, { + "method": "getAttribute", + "xpath": "//*[@id=\"count\"]", + "name": "value", "loop": { "while": { "condition": "checkNum", @@ -282,10 +285,6 @@ def checkNum(driver: WebDriver, xpath: str) -> bool: } ] } - }, { - "method": "getAttribute", - "xpath": "//*[@id=\"count\"]", - "name": "value" }] json_str = cpt.Script.generate_json(step) res = cpt.Script(json_str)(webdriver) @@ -293,7 +292,7 @@ def checkNum(driver: WebDriver, xpath: str) -> bool: webdriver.quit() def test_conditions(self): - webdriver = get_driver() + # webdriver = get_driver() step = [{ "method": "redirect", "url": "https://www.baidu.com/", @@ -301,7 +300,7 @@ def test_conditions(self): "method": "input", "xpath": "//*[@id=\"kw\"]", "text": "your search text", - "if": { + "check": { "condition": "presence", "xpath": "//*[@id=\"su\"]" } From 53d8662d4cee75bc3f750af666ef211eed14b9fb Mon Sep 17 00:00:00 2001 From: wuwenyi <773323518@qq.com> Date: Fri, 10 May 2024 00:15:53 +0800 Subject: [PATCH 2/2] version 0.1.2 --- crawlipt/__version__.py | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/crawlipt/__version__.py b/crawlipt/__version__.py index 38d154e..9f34a28 100644 --- a/crawlipt/__version__.py +++ b/crawlipt/__version__.py @@ -3,6 +3,6 @@ # 88YbdP88 8P 88""" dP__Yb Yb 88"Yb dP__Yb Yb "88 88"" # 88 YY 88 dP 88 dP""""Yb YboodP 88 Yb dP""""Yb YboodP 888888 -VERSION = (0, 1, 1) +VERSION = (0, 1, 2) __version__ = '.'.join(map(str, VERSION)) diff --git a/setup.py b/setup.py index ccb7e6e..21fd1ed 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,7 @@ EMAIL = 'wwy20001014@foxmail.com' AUTHOR = 'WwwwwyDev' REQUIRES_PYTHON = '>=3.10.0' -VERSION = '0.1.1' +VERSION = '0.1.2' # What packages are required for this module to be executed? REQUIRED = [