|
2 | 2 |
|
3 | 3 | import random
|
4 | 4 |
|
| 5 | +_agents = [ |
| 6 | + 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36', |
| 7 | + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11', |
| 8 | + 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16', |
| 9 | + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11', |
| 10 | + 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36', |
| 11 | + 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko', |
| 12 | + 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)' |
| 13 | + 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0', |
| 14 | + 'Mozilla/5.0 (Linux; U; Android 2.2) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1', |
| 15 | + 'Mozilla/5.0 (Windows NT 5.1; rv:7.0.1) Gecko/20100101 Firefox/7.0.1', |
| 16 | + 'Mozilla/5.0 (Linux; Android 6.0.1; SM-G532G Build/MMB29T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.83 Mobile Safari/537.36', |
| 17 | + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/604.5.6 (KHTML, like Gecko) Version/11.0.3 Safari/604.5.6', |
| 18 | + 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2', |
| 19 | + 'MAC:Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36', |
| 20 | + 'Windows:Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50', |
| 21 | + 'Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5', |
| 22 | + 'Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5', |
| 23 | + 'Mozilla/5.0 (iPad; U; CPU OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5', |
| 24 | +] |
| 25 | + |
5 | 26 |
|
6 | 27 | class RandomUserAgentMiddleware(object):
|
7 | 28 | """
|
8 | 29 | 随机User Agent 中间件
|
9 | 30 | """
|
| 31 | + |
10 | 32 | @classmethod
|
11 | 33 | def from_crawler(cls, crawler):
|
12 |
| - return cls(user_agents=crawler.settings.getlist('USER_AGENTS')) |
| 34 | + return cls(user_agents=crawler.settings.getlist('USER_AGENTS', None)) |
13 | 35 |
|
14 |
| - def __init__(self, user_agents=[]): |
15 |
| - self.user_agents = user_agents |
| 36 | + def __init__(self, user_agents=None): |
| 37 | + self.user_agents = user_agents if user_agents is not None else _agents |
16 | 38 |
|
17 | 39 | def process_request(self, request, spider):
|
18 | 40 | if self.user_agents != None and len(self.user_agents) > 0:
|
|
0 commit comments