Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 93 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
# Created by .ignore support plugin (hsz.mobi)
### Python template
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover
.hypothesis/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# pyenv
.python-version

# celery beat schedule file
celerybeat-schedule

# dotenv
.env

# virtualenv
.venv/
venv/
ENV/

# Spyder project settings
.spyderproject

# Rope project settings
.ropeproject
.idea
22 changes: 21 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1 +1,21 @@
# JD_Spider_python是一个python抓取京东商城的爬虫,没有使用框架,主要使用了requests,BeautifulSoup,threading,time,mysql数据库存储
`JD_Spider_python`是一个python抓取京东商城的爬虫,没有使用框架,主要使用了requests,BeautifulSoup,threading,time,mysql数据库存储

# Usage

## requirements

```
pip install requests
pip install MySQL-python
pip install beautifulsoup4
```

## db schema
```
CREATE DATABASE `jd_crawler` /*!40100 DEFAULT CHARACTER SET latin1 */;
CREATE TABLE `JD` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`img_url` varchar(1024) NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=142 DEFAULT CHARSET=latin1;
```
6 changes: 3 additions & 3 deletions SQL.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@ def __init__(self):
self.user='root'
self.password='root'
self.host='localhost'
self.database='python'
self.database='jd_crawler'
def get_connection(self):
return db.connect(user="root",passwd="root",host="localhost",db="python",charset="utf8")
return db.connect(user="root",passwd="root",host="127.0.0.1",port=3306,db="jd_crawler",charset="utf8")

def save_img(self,url):
conn=self.get_connection()
cursor=conn.cursor()
cursor.execute("insert into JD(id,img_url) values(NULL,%s)",url) #将img_url插入到数据库中
cursor.execute("insert into JD(img_url) values(%s)", [url]) #将img_url插入到数据库中
conn.commit()