Skip to content

Commit 6e0e211

Browse files
[User reported bug fixes & dependancy updates]
Min python version now 3.8
1 parent 45f1587 commit 6e0e211

File tree

4 files changed

+85
-85
lines changed

4 files changed

+85
-85
lines changed

scrapeops_scrapy/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.5.3"
1+
__version__ = "0.5.4"

scrapeops_scrapy/core/error_logger.py

+74-73
Original file line numberDiff line numberDiff line change
@@ -134,79 +134,63 @@ def emit(self, record):
134134
try:
135135

136136
if(record.levelname == "ERROR" or record.levelname == "WARNING" or record.levelname == "CRITICAL"):
137-
138-
errorMessage = record.message
139-
fileAndLine = record.pathname + ', line: ' + str(record.lineno)
140-
dateTime = record.asctime
141-
type = record.levelname
142-
engine = record.name
143-
144-
145-
#covering warnings/probableCause/traceback missing
146-
traceback = 'No traceback available'
147-
probableCause = ''
148-
149-
if record.exc_text is not None:
150-
traceback = record.exc_text
151-
splitTraceback = traceback.split('\n')
152-
probableCause = splitTraceback[len(splitTraceback) - 1]
153-
154-
155-
#covering retrys
156-
if("Gave up retrying <" in record.message):
157-
158-
for retryError in self.retryErrors:
159-
if(retryError in record.message):
160-
method = record.message.split('<')[1].split(' ')[0]
161-
errorMessage = "Error: Gave up retrying " + method + " request - " + retryError
162-
fileAndLine = ''
163-
probableCause = retryError
164-
break
165-
166-
# Deprecation Warnings
167-
if "ScrapyDeprecationWarning:" in record.message and record.message[0] == "/":
168-
splitString = record.message.split("ScrapyDeprecationWarning:")
169-
errorMessage = "ScrapyDeprecationWarning: " + splitString[1]
170-
probableCause = splitString[0]
171-
172-
173-
# "Some Other Error Occurred"
174-
if "Some other error occurred: " in record.message:
175-
splitError = record.message.split(' /')
176-
cleanError = splitError[0].split(">: ")[1]
177-
errorMessage = "Some other error occurred: " + cleanError
178-
probableCause = cleanError
179-
traceback = record.message
180-
181-
182-
# Convert Urls To Domains in Error Messages
183-
urls = re.findall(r'(https?://[^\s]+)', errorMessage)
184-
for url in urls:
185-
domain = DomainNormalizer.get_domain(url)
186-
errorMessage = errorMessage.replace(url, domain)
187-
188-
189-
if errorMessage in self.log_dict:
190-
self.log_dict[errorMessage]['count'] = self.log_dict[errorMessage]['count'] + 1
191-
else:
192-
self.log_dict[errorMessage] = {
193-
'type': type,
194-
'engine': engine,
195-
'name': errorMessage,
196-
'count': 1,
197-
'traceback': traceback,
198-
'message' : probableCause,
199-
'filepath': fileAndLine,
200-
'dateTime': dateTime
201-
}
202-
203-
if(SOPSRequest.HIGH_FREQ_ACC == True):
204-
205-
if(errorMessage in self.log_dict_cumulative):
206-
self.log_dict_cumulative[errorMessage]['count'] = self.log_dict_cumulative[errorMessage]['count'] + 1
137+
138+
if hasattr(record, 'message'):
139+
errorMessage = record.message
140+
fileAndLine = record.pathname + ', line: ' + str(record.lineno)
141+
dateTime = record.asctime
142+
type = record.levelname
143+
engine = record.name
144+
145+
146+
#covering warnings/probableCause/traceback missing
147+
traceback = 'No traceback available'
148+
probableCause = ''
149+
150+
if record.exc_text is not None:
151+
traceback = record.exc_text
152+
splitTraceback = traceback.split('\n')
153+
probableCause = splitTraceback[len(splitTraceback) - 1]
154+
155+
156+
#covering retrys
157+
if("Gave up retrying <" in record.message):
158+
159+
for retryError in self.retryErrors:
160+
if(retryError in record.message):
161+
method = record.message.split('<')[1].split(' ')[0]
162+
errorMessage = "Error: Gave up retrying " + method + " request - " + retryError
163+
fileAndLine = ''
164+
probableCause = retryError
165+
break
166+
167+
# Deprecation Warnings
168+
if "ScrapyDeprecationWarning:" in record.message and record.message[0] == "/":
169+
splitString = record.message.split("ScrapyDeprecationWarning:")
170+
errorMessage = "ScrapyDeprecationWarning: " + splitString[1]
171+
probableCause = splitString[0]
172+
173+
174+
# "Some Other Error Occurred"
175+
if "Some other error occurred: " in record.message:
176+
splitError = record.message.split(' /')
177+
cleanError = splitError[0].split(">: ")[1]
178+
errorMessage = "Some other error occurred: " + cleanError
179+
probableCause = cleanError
180+
traceback = record.message
181+
182+
183+
# Convert Urls To Domains in Error Messages
184+
urls = re.findall(r'(https?://[^\s]+)', errorMessage)
185+
for url in urls:
186+
domain = DomainNormalizer.get_domain(url)
187+
errorMessage = errorMessage.replace(url, domain)
188+
189+
190+
if errorMessage in self.log_dict:
191+
self.log_dict[errorMessage]['count'] = self.log_dict[errorMessage]['count'] + 1
207192
else:
208-
209-
self.log_dict_cumulative[errorMessage] = {
193+
self.log_dict[errorMessage] = {
210194
'type': type,
211195
'engine': engine,
212196
'name': errorMessage,
@@ -215,7 +199,24 @@ def emit(self, record):
215199
'message' : probableCause,
216200
'filepath': fileAndLine,
217201
'dateTime': dateTime
218-
}
202+
}
203+
204+
if(SOPSRequest.HIGH_FREQ_ACC == True):
205+
206+
if(errorMessage in self.log_dict_cumulative):
207+
self.log_dict_cumulative[errorMessage]['count'] = self.log_dict_cumulative[errorMessage]['count'] + 1
208+
else:
209+
210+
self.log_dict_cumulative[errorMessage] = {
211+
'type': type,
212+
'engine': engine,
213+
'name': errorMessage,
214+
'count': 1,
215+
'traceback': traceback,
216+
'message' : probableCause,
217+
'filepath': fileAndLine,
218+
'dateTime': dateTime
219+
}
219220

220221
except Exception as e:
221222
logging.info('Error: Error in error logger')

scrapeops_scrapy/validators/response_validator.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -110,9 +110,9 @@ def string_check(text, text_check, comparison, text_slice=None):
110110
@staticmethod
111111
def string_slice(text, text_slice):
112112
if text_slice.get('active'):
113-
if text_slice.get('slice_type') == 'first':
113+
if (text_slice.get('slice_type') == 'first') and (len(text) > 0):
114114
return text[:text_slice.get('slice_upper_threshold', len(text))]
115-
if text_slice.get('slice_type') == 'last':
115+
if (text_slice.get('slice_type') == 'last') and (len(text) > 0):
116116
return text[-text_slice.get('slice_lower_threshold', 0)]
117117
if text_slice.get('slice_type') == 'range':
118118
return text[text_slice.get('slice_lower_threshold', 0):text_slice.get('slice_upper_threshold', len(text))]

setup.py

+8-9
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from setuptools import setup, find_packages
22

33

4-
VERSION = '0.5.3'
4+
VERSION = '0.5.4'
55
DESCRIPTION = 'Scrapeops Scrapy SDK, is a monitoring tool for your Scrapy spiders.'
66

77
setup(name='scrapeops_scrapy',
@@ -14,23 +14,22 @@
1414
url="https://github.com/ScrapeOps/scrapeops-scrapy-sdk",
1515
packages=find_packages(),
1616
install_requires=[
17-
"tld>=0.12.4",
18-
"requests>=2.24.0",
19-
"json5>=0.9.5",
20-
"urllib3>=1.25.10",
21-
"itemadapter>=0.4.0",
17+
"tld>=0.13",
18+
"requests>=2.31.0",
19+
"json5>=0.9.13",
20+
"urllib3>=2.1",
21+
"itemadapter>=0.8.0",
2222
],
2323
classifiers=[
2424
"Programming Language :: Python",
2525
"Programming Language :: Python :: 3",
26-
"Programming Language :: Python :: 3.6",
27-
"Programming Language :: Python :: 3.7",
2826
"Programming Language :: Python :: 3.8",
2927
"Programming Language :: Python :: 3.9",
3028
"Programming Language :: Python :: 3.10",
29+
"Programming Language :: Python :: 3.11",
3130
"License :: OSI Approved :: BSD License",
3231
"Operating System :: OS Independent",
3332
"Intended Audience :: Developers",
3433
],
35-
python_requires=">=3.6",
34+
python_requires=">=3.8",
3635
)

0 commit comments

Comments
 (0)