Skip to content

Commit cf63315

Browse files
authored
Create Exercise-17-Decode-A-Web-Page.py
1 parent d2b4560 commit cf63315

File tree

1 file changed

+47
-0
lines changed

1 file changed

+47
-0
lines changed

Exercise-17-Decode-A-Web-Page.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
'''
2+
Exercise 17: Decode A Web Page
3+
4+
This is the first 4-chili exercise of this blog! We’ll see what people think,
5+
and decide whether or not to continue with 4-chili exercises in the future.
6+
7+
Use the BeautifulSoup and requests Python packages to print out a list of all
8+
the article titles on the New York Times homepage.
9+
10+
'''
11+
# Solution
12+
import requests
13+
from bs4 import BeautifulSoup
14+
15+
def get_html_content_in_text(url):
16+
"""
17+
Grab all the content in webpage url and return it's content in text.
18+
19+
Arguments:
20+
url -- a webpage url string.
21+
22+
Returns:
23+
r.text -- the content of webpage in text.
24+
25+
"""
26+
r = requests.get(url)
27+
return r.text
28+
29+
def main():
30+
content = get_html_content_in_text('http://www.nytimes.com/')
31+
soup = BeautifulSoup(content, "html5lib")
32+
for element in soup.find_all(class_="story-heading"):
33+
if element.a:
34+
print(element.a.text.replace("\n", " ").strip())
35+
else:
36+
print(element.contents[0].strip())
37+
38+
if __name__ == "__main__":
39+
main()
40+
41+
# Test Part - 2018-3-29
42+
# Trump Lawyer Broached Idea of Pardons for 2 Top Ex-Aides
43+
# Trump Aide Spoke in ’16 to Person Tied to Russia Intelligence
44+
# Justice Dept. Opens Internal Investigation on Surveillance of Trump Campaign Official
45+
# ‘Kiss Up, Kick Down’: Recalling Bolton’s Confirmation in 2005
46+
# Veterans Affairs Chief Is Out, as Trump’s Shake-Up Continues
47+
# The Trump Administration’s Major Departures

0 commit comments

Comments
 (0)