Skip to content

Commit 3d2dad1

Browse files
author
梁睿坤
committed
Upgrade to python3
1 parent 1be0258 commit 3d2dad1

File tree

5 files changed

+69
-65
lines changed

5 files changed

+69
-65
lines changed

requirements.txt

+41-59
Original file line numberDiff line numberDiff line change
@@ -1,67 +1,49 @@
1-
adblockparser==0.7
2-
aliyun-python-sdk-core-v3==2.13.0
3-
aliyun-python-sdk-kms==2.5.0
4-
asn1crypto==0.24.0
5-
astroid==2.1.0
6-
attrs==18.2.0
7-
Automat==0.7.0
8-
autopep8==1.4.3
9-
certifi==2018.11.29
10-
cffi==1.12.1
11-
chardet==3.0.4
1+
aliyun-python-sdk-core==2.13.30
2+
aliyun-python-sdk-core-v3==2.13.11
3+
aliyun-python-sdk-kms==2.13.0
4+
attrs==20.3.0
5+
Automat==20.2.0
6+
certifi==2020.12.5
7+
cffi==1.14.4
8+
chardet==4.0.0
129
constantly==15.1.0
1310
crcmod==1.7
14-
cryptography==2.5
15-
cssselect==1.0.3
16-
dateparser==0.7.1
17-
funcparserlib==0.3.6
18-
hyperlink==18.0.0
19-
idna==2.8
11+
cryptography==3.2.1
12+
cssselect==1.1.0
13+
dateparser==1.0.0
14+
hyperlink==20.0.1
15+
idna==2.10
2016
incremental==17.5.0
21-
isort==4.3.4
22-
jmespath==0.9.3
23-
lazy-object-proxy==1.3.1
24-
lxml==4.3.1
25-
mccabe==0.6.1
26-
oss2==2.6.1
27-
parsel==1.5.1
28-
Pillow==5.4.1
29-
psutil==5.5.1
30-
psycopg2==2.7.7
31-
psycopg2-binary==2.7.7
32-
pyasn1==0.4.5
33-
pyasn1-modules==0.2.4
34-
pycodestyle==2.5.0
35-
pycparser==2.19
36-
pycryptodome==3.7.3
17+
itemadapter==0.2.0
18+
itemloaders==1.0.4
19+
jmespath==0.10.0
20+
lxml==4.6.2
21+
oss2==2.13.1
22+
parsel==1.6.0
23+
Protego==0.1.16
24+
pyasn1==0.4.8
25+
pyasn1-modules==0.2.8
26+
pycparser==2.20
27+
pycryptodome==3.9.9
3728
PyDispatcher==2.0.5
38-
PyHamcrest==1.9.0
39-
pylint==2.2.2
40-
pymongo==3.7.2
41-
pyOpenSSL==19.0.0
42-
pyquery==1.4.0
43-
python-dateutil==2.8.0
44-
pytz==2018.9
45-
qt5reactor==0.5
29+
PyHamcrest==2.0.2
30+
pymongo==3.11.2
31+
pyOpenSSL==20.0.1
32+
python-dateutil==2.8.1
33+
pytz==2020.4
4634
queuelib==1.5.0
47-
redis==3.2.1
48-
regex==2019.2.21
49-
requests==2.21.0
50-
Scrapy==1.6.0
35+
redis==3.5.3
36+
regex==2020.11.13
37+
requests==2.25.1
38+
Scrapy==2.4.1
5139
scrapy-splash==0.7.2
52-
scrapyd==1.2.0
53-
scrapyd-client==1.1.0
5440
selenium==3.141.0
5541
service-identity==18.1.0
56-
six==1.12.0
57-
splash==3.3.1
58-
SQLAlchemy==1.2.18
59-
stem==1.7.1
60-
Twisted==18.9.0
61-
typed-ast==1.3.1
62-
tzlocal==1.5.1
63-
urllib3==1.24.1
64-
w3lib==1.20.0
65-
wrapt==1.11.1
66-
xvfbwrapper==0.2.9
67-
zope.interface==4.6.0
42+
six==1.15.0
43+
SQLAlchemy==1.3.20
44+
stem==1.8.0
45+
Twisted==20.3.0
46+
tzlocal==2.1
47+
urllib3==1.26.2
48+
w3lib==1.22.0
49+
zope.interface==5.2.0

scrapy_plus/middlewares/ua.py

+25-3
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,39 @@
22

33
import random
44

5+
_agents = [
6+
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36',
7+
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
8+
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16',
9+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11',
10+
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36',
11+
'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko',
12+
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)'
13+
'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0',
14+
'Mozilla/5.0 (Linux; U; Android 2.2) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1',
15+
'Mozilla/5.0 (Windows NT 5.1; rv:7.0.1) Gecko/20100101 Firefox/7.0.1',
16+
'Mozilla/5.0 (Linux; Android 6.0.1; SM-G532G Build/MMB29T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.83 Mobile Safari/537.36',
17+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/604.5.6 (KHTML, like Gecko) Version/11.0.3 Safari/604.5.6',
18+
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2',
19+
'MAC:Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36',
20+
'Windows:Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50',
21+
'Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5',
22+
'Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5',
23+
'Mozilla/5.0 (iPad; U; CPU OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5',
24+
]
25+
526

627
class RandomUserAgentMiddleware(object):
728
"""
829
随机User Agent 中间件
930
"""
31+
1032
@classmethod
1133
def from_crawler(cls, crawler):
12-
return cls(user_agents=crawler.settings.getlist('USER_AGENTS'))
34+
return cls(user_agents=crawler.settings.getlist('USER_AGENTS', None))
1335

14-
def __init__(self, user_agents=[]):
15-
self.user_agents = user_agents
36+
def __init__(self, user_agents=None):
37+
self.user_agents = user_agents if user_agents is not None else _agents
1638

1739
def process_request(self, request, spider):
1840
if self.user_agents != None and len(self.user_agents) > 0:

scrapy_plus/processors.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import re
33
import six
44

5-
from six.moves.urllib.parse import urljoin, urlparse, urlunparse
5+
from urllib.parse import urljoin, urlparse, urlunparse
66

77
from copy import deepcopy
88
from itertools import chain

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
setup(
1212
name='scrapy_plus',
13-
version='1.0.4',
13+
version='1.0.5',
1414
packages=find_packages(exclude=["tests"]),
1515
install_requires=REQS,
1616
url='http://www.github.com/dotnetage/scrapy_plus',

tests/test_processors.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# coding:utf-8
22
import unittest
3-
from douban.processors import Number, Text, Date, Price
3+
from scrapy_plus.processors import Number, Text, Date, Price
44

55

66
class ProcessorTestCase(unittest.TestCase):

0 commit comments

Comments
 (0)