Skip to content

Commit

Permalink
refactor code
Browse files Browse the repository at this point in the history
  • Loading branch information
tikazyq committed Jun 6, 2022
1 parent 5ff35b5 commit dc260f5
Show file tree
Hide file tree
Showing 22 changed files with 154 additions and 128 deletions.
24 changes: 0 additions & 24 deletions _ext/python/cli/actions/login.py

This file was deleted.

File renamed without changes.
24 changes: 24 additions & 0 deletions _ext/python/crawlab/actions/login.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from crawlab.client import http_post
from crawlab.config.config import config
from crawlab.constants.upload import CLI_DEFAULT_CONFIG_KEY_USERNAME, CLI_DEFAULT_CONFIG_KEY_PASSWORD, \
CLI_DEFAULT_CONFIG_KEY_API_ADDRESS, CLI_DEFAULT_CONFIG_KEY_TOKEN


def login(api_address: str, username: str, password: str):
url = f'{api_address}/login'
try:
res = http_post(url, {
'username': username,
'password': password,
})
print('logged-in successfully')
except Exception as e:
print(e)
return

token = res.json().get('data')
config.set(CLI_DEFAULT_CONFIG_KEY_USERNAME, username)
config.set(CLI_DEFAULT_CONFIG_KEY_PASSWORD, password)
config.set(CLI_DEFAULT_CONFIG_KEY_API_ADDRESS, api_address)
config.set(CLI_DEFAULT_CONFIG_KEY_TOKEN, token)
config.save()
Original file line number Diff line number Diff line change
@@ -1,43 +1,84 @@
import os
import re
from argparse import Namespace

from print_color import print as print_color

from cli.client.request import http_put, http_post
from cli.constants import CLI_DEFAULT_UPLOAD_IGNORE_PATTERNS, CLI_DEFAULT_UPLOAD_SPIDER_MODE, \
CLI_DEFAULT_UPLOAD_SPIDER_CMD
from cli.errors import MissingIdException, HttpException
from crawlab.config.spider import get_spider_config
from crawlab.client import http_put, http_post
from crawlab.config import get_spider_config
from crawlab.constants.upload import CLI_DEFAULT_UPLOAD_SPIDER_MODE, CLI_DEFAULT_UPLOAD_SPIDER_CMD, \
CLI_DEFAULT_UPLOAD_IGNORE_PATTERNS
from crawlab.errors.upload import MissingIdException, HttpException


def upload(args: Namespace):
# spider id
_id = args.id
def create_spider(name: str, description: str = None, mode: str = None, priority: int = None, cmd: str = None,
param: str = None, col_name: str = None) -> str:
# results collection name
if col_name is None:
col_name = f'results_{"_".join(name.lower().split(" "))}'

# directory path
dir_ = args.dir
if dir_ is None:
dir_ = os.path.abspath('.')
# mode
if mode is None:
mode = CLI_DEFAULT_UPLOAD_SPIDER_MODE

# cmd
if cmd is None:
cmd = CLI_DEFAULT_UPLOAD_SPIDER_CMD

# http put
res = http_put(url='/spiders', data={
'name': name,
'description': description,
'mode': mode,
'priority': priority,
'cmd': cmd,
'param': param,
'col_name': col_name,
})

return res.json().get('data').get('_id')


def upload_file(_id: str, file_path: str, target_path: str):
if _id is None:
raise MissingIdException

with open(file_path, 'rb') as f:
data = {
'path': target_path,
}
files = {'file': f}

url = f'/spiders/{_id}/files/save'
http_post(url=url, data=data, files=files, headers={})


def upload_dir(dir_path: str, create: bool = True, spider_id: str = None, name=None, description=None, mode=None,
priority=None, cmd=None, param=None, col_name=None):
# spider config
cfg = get_spider_config(dir_)
cfg = get_spider_config(dir_path)

# variables
name = args.name if args.name is not None else cfg.name
description = args.description if args.description is not None else cfg.description
mode = args.mode if args.mode is not None else cfg.mode
priority = args.priority if args.priority is not None else cfg.priority
cmd = args.cmd if args.cmd is not None else cfg.cmd
param = args.param if args.param is not None else cfg.param
col_name = args.col_name if args.col_name is not None else cfg.col_name
if name is None:
name = cfg.name
if description is None:
description = cfg.description
if mode is None:
mode = cfg.mode
if priority is None:
priority = cfg.priority
if cmd is None:
cmd = cfg.cmd
if param is None:
param = cfg.param
if col_name is None:
col_name = cfg.col_name

# create spider
if args.create:
if create:
try:
_id = create_spider(name=name, description=description, mode=mode, priority=priority, cmd=cmd, param=param,
col_name=col_name)
print_color(f'created spider {name} (id: {_id})', tag='success', tag_color='green', color='white')
spider_id = create_spider(name=name, description=description, mode=mode, priority=priority, cmd=cmd,
param=param, col_name=col_name)
print_color(f'created spider {name} (id: {spider_id})', tag='success', tag_color='green', color='white')
except HttpException:
print_color(f'create spider {name} failed', tag='error', tag_color='red', color='white')
return
Expand All @@ -48,8 +89,8 @@ def upload(args: Namespace):
'error': 0,
}

# iterate files
for root, dirs, files in os.walk(dir_):
# iterate all files in the directory
for root, dirs, files in os.walk(dir_path):
for file_name in files:
# file path
file_path = os.path.join(root, file_name)
Expand All @@ -59,11 +100,11 @@ def upload(args: Namespace):
continue

# target path
target_path = file_path.replace(dir_, '')
target_path = file_path.replace(dir_path, '')

# upload file
try:
upload_file(_id, file_path, target_path)
upload_file(spider_id, file_path, target_path)
print_color(f'uploaded {file_path}', tag='success', tag_color='green', color='white')
stats['success'] += 1

Expand All @@ -77,48 +118,6 @@ def upload(args: Namespace):
print_color(f'failed: {stats["error"]}', tag='info', tag_color='cyan', color='white')


def create_spider(name: str, description: str = None, mode: str = None, priority: int = None, cmd: str = None,
param: str = None, col_name: str = None) -> str:
# results collection name
if col_name is None:
col_name = f'results_{"_".join(name.lower().split(" "))}'

# mode
if mode is None:
mode = CLI_DEFAULT_UPLOAD_SPIDER_MODE

# cmd
if cmd is None:
cmd = CLI_DEFAULT_UPLOAD_SPIDER_CMD

# http put
res = http_put(url='/spiders', data={
'name': name,
'description': description,
'mode': mode,
'priority': priority,
'cmd': cmd,
'param': param,
'col_name': col_name,
})

return res.json().get('data').get('_id')


def upload_file(_id: str, file_path: str, target_path: str):
if _id is None:
raise MissingIdException

with open(file_path, 'rb') as f:
data = {
'path': target_path,
}
files = {'file': f}

url = f'/spiders/{_id}/files/save'
http_post(url=url, data=data, files=files, headers={})


def is_ignored(file_path: str) -> bool:
for pat in CLI_DEFAULT_UPLOAD_IGNORE_PATTERNS:
if re.search(pat, file_path) is not None:
Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from cli.config import config
from cli.constants import CLI_DEFAULT_CONFIG_KEY_PASSWORD
from crawlab.config.config import config
from crawlab.constants.upload import CLI_DEFAULT_CONFIG_KEY_PASSWORD


def config_func(args):
def cli_config_func(args):
if args.set is not None:
k, v = args.set.split('=')
config.set(k, v)
Expand Down
5 changes: 5 additions & 0 deletions _ext/python/crawlab/cli/actions/login.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from crawlab.actions.login import login


def cli_login(args):
login(api_address=args.api_address, username=args.username, password=args.password)
18 changes: 18 additions & 0 deletions _ext/python/crawlab/cli/actions/upload.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import os
from argparse import Namespace

from crawlab.actions.upload import upload_dir


def cli_upload(args: Namespace):
# spider id
spider_id = args.id

# directory path
dir_path = args.dir
if dir_path is None:
dir_path = os.path.abspath('')

# upload directory
upload_dir(dir_path=dir_path, create=args.create, spider_id=spider_id, name=args.name, description=args.description,
mode=args.mode, priority=args.priority, cmd=args.cmd, param=args.param, col_name=args.col_name)
15 changes: 8 additions & 7 deletions _ext/python/cli/main.py → _ext/python/crawlab/cli/main.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import argparse

from cli.actions.config import config_func
from cli.constants import CLI_ACTION_UPLOAD, CLI_ACTION_LOGIN, CLI_DEFAULT_API_ADDRESS, CLI_DEFAULT_API_USERNAME, \
from crawlab.constants.upload import CLI_ACTION_UPLOAD, CLI_ACTION_LOGIN, CLI_DEFAULT_API_ADDRESS, \
CLI_DEFAULT_API_USERNAME, \
CLI_DEFAULT_API_PASSWORD, CLI_ACTION_CONFIG
from cli.actions.login import login
from cli.actions.upload import upload
from crawlab.cli.actions.config import cli_config_func
from crawlab.cli.actions.login import cli_login
from crawlab.cli.actions.upload import cli_upload

# root parser
root_parser = argparse.ArgumentParser(description='CLI tool for Crawlab')
Expand All @@ -20,7 +21,7 @@
type=str)
login_parser.add_argument('--password', '-p', help='Password for logging in Crawlab', default=CLI_DEFAULT_API_PASSWORD,
type=str)
login_parser.set_defaults(func=login, action=CLI_ACTION_LOGIN)
login_parser.set_defaults(func=cli_login, action=CLI_ACTION_LOGIN)

# upload parser
upload_parser = subparsers.add_parser(CLI_ACTION_UPLOAD)
Expand All @@ -45,13 +46,13 @@
upload_parser.add_argument('--col_name', '-C',
help='Spider results collection name if creating a new spider. Default: results_<spider_name>',
type=str)
upload_parser.set_defaults(func=upload, action=CLI_ACTION_UPLOAD)
upload_parser.set_defaults(func=cli_upload, action=CLI_ACTION_UPLOAD)

# config parser
config_parser = subparsers.add_parser(CLI_ACTION_CONFIG)
config_parser.add_argument('--set', '-s', type=str)
config_parser.add_argument('--unset', '-u', type=str)
config_parser.set_defaults(func=config_func, action=CLI_ACTION_CONFIG)
config_parser.set_defaults(func=cli_config_func, action=CLI_ACTION_CONFIG)


def main():
Expand Down
3 changes: 3 additions & 0 deletions _ext/python/crawlab/client/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .request import *
from .response import *
from .client import *
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import requests

from cli.config import config
from cli.constants import CLI_DEFAULT_CONFIG_KEY_API_ADDRESS, CLI_DEFAULT_CONFIG_KEY_TOKEN
from cli.errors import HttpException
from crawlab.config.config import config
from crawlab.constants.upload import CLI_DEFAULT_CONFIG_KEY_API_ADDRESS, CLI_DEFAULT_CONFIG_KEY_TOKEN
from crawlab.errors.upload import HttpException


def http_request(method: str, url: str, params: dict = None, data: dict = None, headers: dict = None,
Expand Down Expand Up @@ -50,17 +50,17 @@ def http_request(method: str, url: str, params: dict = None, data: dict = None,
raise HttpException(err_msg)


def http_get(url: str, params: dict, headers: dict = None, **kwargs):
def http_get(url: str, params: dict = None, headers: dict = None, **kwargs):
return http_request(method='GET', url=url, params=params, headers=headers, **kwargs)


def http_put(url: str, data: dict, headers: dict = None, **kwargs):
def http_put(url: str, data: dict = None, headers: dict = None, **kwargs):
return http_request(method='PUT', url=url, data=data, headers=headers, **kwargs)


def http_post(url: str, data: dict, headers: dict = None, **kwargs):
def http_post(url: str, data: dict = None, headers: dict = None, **kwargs):
return http_request(method='POST', url=url, data=data, headers=headers, **kwargs)


def http_delete(url: str, data: dict, headers: dict = None, **kwargs):
def http_delete(url: str, data: dict = None, headers: dict = None, **kwargs):
return http_request(method='DELETE', url=url, data=data, headers=headers, **kwargs)
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
import json
import os

from cli.constants import CLI_DEFAULT_CONFIG_ROOT_DIR, CLI_DEFAULT_CONFIG_CLI_DIR, CLI_DEFAULT_CONFIG_FILE_NAME, \
from crawlab.constants.upload import CLI_DEFAULT_CONFIG_ROOT_DIR, CLI_DEFAULT_CONFIG_CLI_DIR, \
CLI_DEFAULT_CONFIG_FILE_NAME, \
CLI_DEFAULT_CONFIG_KEY_PASSWORD

home = os.curdir
Expand Down
File renamed without changes.
Empty file.
File renamed without changes.
4 changes: 2 additions & 2 deletions _ext/python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

setup(
name='crawlab-sdk',
version='0.6.0-2',
version='0.6.0-3',
packages=find_packages(),
url='https://github.com/crawlab-team/crawlab-sdk',
license='BSD-3-Clause',
Expand All @@ -26,7 +26,7 @@
install_requires=install_requires,
entry_points={
'console_scripts': [
'crawlab-cli=cli.main:main'
'crawlab-cli=crawlab.cli.main:main'
]
}
)
2 changes: 1 addition & 1 deletion _ext/python/test/cli_action/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
from .login_test import *
from .upload_test import *
from .upload_test import *
Loading

0 comments on commit dc260f5

Please sign in to comment.