Skip to content

Commit

Permalink
Merge pull request #6 from WwwwwyDev/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
WwwwwyDev authored May 6, 2024
2 parents f854f94 + 272bb77 commit 2b1243a
Show file tree
Hide file tree
Showing 10 changed files with 186 additions and 16 deletions.
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,14 @@ The script for selenium in python
You can use Crawlipt to driver the selenium by script in python.The script adopts JSON format for better cross language operations and physical storage.

## installing
You can use pip or pip3 to install the crawlist\
You can use pip or pip3 to install the crawlipt

`pip install crawlipt` or `pip3 install crawlipt`

If you have already installed crawlelip, you may need to update to the latest version

`pip install --upgrade crawlipt`

## quickly start
```python
from webdriver_manager.chrome import ChromeDriverManager
Expand Down
3 changes: 2 additions & 1 deletion crawlipt/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from .script import Script
from .action import Action
from .annotation import check, alias
from __version__ import __version__ as version
from .pojo import Variable, VariableBase
from .__version__ import __version__ as version
2 changes: 1 addition & 1 deletion crawlipt/__version__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@
# 88YbdP88 8P 88""" dP__Yb Yb 88"Yb dP__Yb Yb "88 88""
# 88 YY 88 dP 88 dP""""Yb YboodP 88 Yb dP""""Yb YboodP 888888

VERSION = (0, 0, 4)
VERSION = (0, 0, 5)

__version__ = '.'.join(map(str, VERSION))
1 change: 1 addition & 0 deletions crawlipt/actions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
from .slide import Slide
from .select import Select
from .get import Get
from .window import Window
45 changes: 45 additions & 0 deletions crawlipt/actions/window.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from selenium.common import NoSuchWindowException
from selenium.webdriver.common.by import By
from selenium.webdriver.remote.webdriver import WebDriver

from crawlipt.annotation import check, alias


class Window:
@staticmethod
@check(exclude="driver")
def clear(driver: WebDriver) -> None:
"""
close all windows
:param driver: selenium webdriver
"""
for _ in range(driver.window_handles.__len__()-1):
driver.close()
driver.get("data:,")

@staticmethod
@check(exclude="driver")
def back(driver: WebDriver) -> None:
"""
Goes one step backward in the browser history.
:param driver: selenium webdriver
"""
driver.back()

@staticmethod
@check(exclude="driver")
def forword(driver: WebDriver) -> None:
"""
Goes one step forward in the browser history.
:param driver: selenium webdriver
"""
driver.forward()

@staticmethod
@check(exclude="driver")
def close(driver: WebDriver) -> None:
"""
Closes the current window.
:param driver: selenium webdriver
"""
driver.close()
2 changes: 2 additions & 0 deletions crawlipt/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ def inner_wrapper(*args, **kwargs):
continue
if all_kwargs[name] == "__PRE_RETURN__":
continue
if isinstance(all_kwargs[name], str) and all_kwargs[name].startswith("__v-") and all_kwargs[name].endswith("__"):
continue
if isinstance(all_kwargs[name], int) and type_.annotation is float:
all_kwargs[name] = float(all_kwargs.get(name))
if not isinstance(all_kwargs[name], type_.annotation):
Expand Down
47 changes: 47 additions & 0 deletions crawlipt/pojo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import json
from typing import Any

from crawlipt.annotation import check


class VariableBase:

@check
def get(self, key: str) -> Any:
raise NotImplementedError

@check(exclude="value")
def set(self, key: str, value: Any) -> Any:
raise NotImplementedError

@check
def __contains__(self, key: str):
raise NotImplementedError


class Variable(VariableBase):
@check
def __init__(self, values: dict | str):
if isinstance(values, str):
values: dict = json.load(values)
self.values = values

@check
def get(self, key: str) -> Any:
return self.values.get(key)

@check(exclude="value")
def set(self, key: str, value: Any):
self.values[key] = value

@check
def __contains__(self, key: str):
return key in self.values


class VariableError(Exception):
def __init__(self, message):
self.message = message

def __str__(self):
return self.message
45 changes: 38 additions & 7 deletions crawlipt/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
from selenium.webdriver.support.wait import WebDriverWait
import copy

from crawlipt.pojo import VariableError, VariableBase


class ScriptError(Exception):
def __init__(self, e: Exception, method: str, deep: str):
Expand All @@ -34,7 +36,6 @@ def __str__(self):
error = "[" + self.e.__class__.__name__ + "] " + self.e.__str__() + "\n"
return "(Deep:%s method:%s) arguments is wrong\n" % (self.deep, self.method) + error + info + params + doc


def dfs_search(obj):
for parent in obj.__bases__:
if parent == object:
Expand All @@ -61,7 +62,7 @@ def get_dict(obj):
class ScriptProcess:
ACTIONS = get_dict(Action)
CONDITIONS = get_dict(Condition)
__POP_KEY = {"method", "next", "if", "check", "condition"}
__POP_KEY = {"method", "next", "if", "check", "condition", "loop"}

@staticmethod
@check
Expand Down Expand Up @@ -113,6 +114,9 @@ def syntax_check(script: dict | str, pre_deep: str = "") -> None:
pre_deep=pre_deep,
current_deep=current_deep)
loop_script = loop_temp.get("script")
if not loop_script:
msg = "(Deep %s) loop must set the param of script" % (pre_deep + str(current_deep))
raise ScriptError(ParamTypeError(msg), "", pre_deep + str(current_deep))
ScriptProcess.syntax_check(loop_script, pre_deep=pre_deep + str(current_deep) + "->")
script = script.get("next")
continue
Expand Down Expand Up @@ -185,7 +189,8 @@ def _process_script(script: dict, global_script: dict, webdriver: WebDriver, int
while_condition = loop_temp.get("while")
loop_script = loop_temp.get("script")
if while_condition and cnt:
while ScriptProcess.__process_condition(temp_condition=while_condition, webdriver=webdriver) and cnt:
while ScriptProcess.__process_condition(temp_condition=while_condition,
webdriver=webdriver) and cnt:
ScriptProcess._process_script(script=loop_script,
global_script=global_script,
webdriver=webdriver,
Expand Down Expand Up @@ -250,6 +255,22 @@ def _process_script(script: dict, global_script: dict, webdriver: WebDriver, int
time.sleep(random.uniform(interval / 2, interval))
return pre_return

@staticmethod
@check
def _replace_variable(script: dict, variable: VariableBase) -> None:
while script:
for key in script.keys():
if key not in ScriptProcess.__POP_KEY and isinstance(script[key], str) and script[key].startswith("__v-") and script[key].endswith("__"):
variable_name = script[key][4:-2]
if variable_name not in variable:
msg = f"The {variable_name} is not defined."
raise VariableError(msg)
script[key] = variable.get(variable_name)
for key in script.keys():
if isinstance(script[key], dict):
ScriptProcess._replace_variable(script=script[key], variable=variable)
script = script.get("next")

@staticmethod
@check
def generate(scripts: list | dict | str) -> dict:
Expand Down Expand Up @@ -323,6 +344,8 @@ def add_condition(func: callable) -> None:
ScriptProcess.CONDITIONS[func.__crawlipt_func_name__] = func_bak




class Script(ScriptProcess):

@check
Expand All @@ -331,7 +354,7 @@ def __init__(self, script: dict | str | list, global_script: dict | str | list =
"""
Script Parser
:param script: Need a str of json or dict or list steps that conforms to syntax conventions
:param b: This script will be executed before every actions
:param global_script: This script will be executed before every actions
:param interval: The direct interval between two consecutive scripts
:param wait: The longest wait time before presence of element located
:param is_need_syntax_check: Whether the script need a syntax check
Expand All @@ -351,15 +374,23 @@ def __init__(self, script: dict | str | list, global_script: dict | str | list =
self.wait = wait

@check
def process(self, webdriver: WebDriver) -> Any:
def process(self, webdriver: WebDriver, variable: VariableBase = None) -> Any:
"""
process the script
"""
if self.is_need_syntax_check:
ScriptProcess.syntax_check(self.script)
ScriptProcess.syntax_check(self.global_script)
return ScriptProcess._process_script(script=self.script,
global_script=self.global_script,
script = copy.deepcopy(self.script)
global_script = copy.deepcopy(self.global_script)
if variable:
ScriptProcess._replace_variable(script, variable)
ScriptProcess._replace_variable(global_script, variable)
if self.is_need_syntax_check:
ScriptProcess.syntax_check(script)
ScriptProcess.syntax_check(global_script)
return ScriptProcess._process_script(script=script,
global_script=global_script,
webdriver=webdriver,
interval=self.interval,
wait=self.wait)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
EMAIL = 'wwy20001014@foxmail.com'
AUTHOR = 'WwwwwyDev'
REQUIRES_PYTHON = '>=3.10.0'
VERSION = '0.0.4'
VERSION = '0.0.5'

# What packages are required for this module to be executed?
REQUIRED = [
Expand Down
48 changes: 43 additions & 5 deletions test.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import crawlipt as cpt
import ddddocr as docr


def get_driver(is_headless=False, is_eager=False):
option = wd.ChromeOptions()
arguments = [
Expand Down Expand Up @@ -147,7 +148,7 @@ def test_04(self):
"method": "getInnerText",
"xpath": "//*[@id=\"trans-selection\"]/div/span",
}]
result = cpt.Script(step, interval=0.1)(webdriver)
result = cpt.Script(step, interval=0)(webdriver)
print(result)
webdriver.quit()

Expand All @@ -170,7 +171,7 @@ def test05(self):
webdriver.quit()

def test06(self):
# webdriver = get_driver()
webdriver = get_driver()
class A:
@staticmethod
@cpt.check(exclude="driver")
Expand Down Expand Up @@ -210,9 +211,9 @@ def crackCaptcha(driver: WebDriver, xpath: str) -> str:
"method": "click",
"xpath": "//*[@id=\"dosubmit\"]",
}]
result = cpt.Script(step, interval=1)
# print(result)
# webdriver.quit()
result = cpt.Script(step, interval=1)(webdriver)
print(result)
webdriver.quit()

def test07(self):
webdriver = get_driver()
Expand Down Expand Up @@ -309,6 +310,43 @@ def test_conditions(self):
}]
cpt.Script(step, interval=3)

def test_variable(self):
webdriver = get_driver()
step = [{
"method": "redirect",
"url": "https://www.baidu.com/",
}, {
"method": "input",
"xpath": "//*[@id=\"kw\"]",
"text": "__v-searchKey__",
"if": {
"condition": "presence",
"xpath": "__v-button_xpath__"
}
}, {
"method": "clear"
}]
v1 = cpt.Variable({
"searchKey": "hello",
"button_xpath": "//*[@id=\"su\"]"
})
v2 = cpt.Variable({
"searchKey": "world",
"button_xpath": "//*[@id=\"su\"]"
})
v3 = cpt.Variable({
"searchKey": "world",
"button_xpath": "//*[@id=\"su_no_existence\"]"
})
loader = cpt.Script(step, interval=3)
loader.process(webdriver=webdriver,
variable=v1)
loader.process(webdriver=webdriver,
variable=v2)
loader.process(webdriver=webdriver,
variable=v3)
webdriver.quit()


if __name__ == '__main__':
unittest.main()

0 comments on commit 2b1243a

Please sign in to comment.