Skip to content
This repository was archived by the owner on May 25, 2022. It is now read-only.

Commit 6866c61

Browse files
committedOct 27, 2020
store_emails.py: fix unbound variable (!)
Also, guard against None being returned from body extraction Also, update requirements.txt to include lxml for bs4
1 parent 5665e14 commit 6866c61

File tree

3 files changed

+17
-14
lines changed

3 files changed

+17
-14
lines changed
 

‎projects/Store_emails_in_csv/README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ The messages are written to a simple four-column CSV file.
88

99
## Dependencies
1010

11-
This depends on the BeautifulSoup library
11+
This depends on the BeautifulSoup library and `lxml`
1212
for extracting text from HTML messages.
1313

1414

Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
beautifulsoup4
2+
lxml

‎projects/Store_emails_in_csv/store_emails.py

+15-13
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def get_text(email_body):
4141
return soup.get_text(separator="\n", strip=True)
4242

4343

44-
def write_to_csv(mail, writer):
44+
def write_to_csv(mail, writer, N, total_no_of_mails):
4545

4646
for i in range(total_no_of_mails, total_no_of_mails - N, -1):
4747
res, data = mail.fetch(str(i), "(RFC822)")
@@ -65,10 +65,9 @@ def write_to_csv(mail, writer):
6565
content_disposition = str(part.get("Content-Disposition"))
6666
try:
6767
# get the email email_body
68-
email_body = part.get_payload(decode=True).decode(
69-
"utf-8"
70-
)
71-
email_text = get_text(email_body)
68+
email_body = part.get_payload(decode=True)
69+
if email_body:
70+
email_text = get_text(email_body.decode('utf-8'))
7271
except Exception as exc:
7372
logger.warning('Caught exception: %r', exc)
7473
if (
@@ -85,13 +84,16 @@ def write_to_csv(mail, writer):
8584
# extract content type of email
8685
content_type = msg.get_content_type()
8786
# get the email email_body
88-
email_body = msg.get_payload(decode=True).decode("utf-8")
89-
email_text = get_text(email_body)
90-
91-
# Write data in the csv file
92-
row = [email_date, email_from, email_subject, email_text]
93-
writer.writerow(row)
94-
87+
email_body = msg.get_payload(decode=True)
88+
if email_body:
89+
email_text = get_text(email_body.decode('utf-8'))
90+
91+
if email_text is not None:
92+
# Write data in the csv file
93+
row = [email_date, email_from, email_subject, email_text]
94+
writer.writerow(row)
95+
else:
96+
logger.warning('%s:%i: No message extracted', "INBOX", i)
9597

9698
def main():
9799
mail, messages = connect_to_mailbox()
@@ -107,7 +109,7 @@ def main():
107109
writer = csv.writer(fw)
108110
writer.writerow(["Date", "From", "Subject", "Text mail"])
109111
try:
110-
write_to_csv(mail, writer)
112+
write_to_csv(mail, writer, N, total_no_of_mails)
111113
except Exception as exc:
112114
logger.warning('Caught exception: %r', exc)
113115

0 commit comments

Comments
 (0)