File tree 1 file changed +47
-0
lines changed 1 file changed +47
-0
lines changed Original file line number Diff line number Diff line change
1
+ '''
2
+ Exercise 17: Decode A Web Page
3
+
4
+ This is the first 4-chili exercise of this blog! We’ll see what people think,
5
+ and decide whether or not to continue with 4-chili exercises in the future.
6
+
7
+ Use the BeautifulSoup and requests Python packages to print out a list of all
8
+ the article titles on the New York Times homepage.
9
+
10
+ '''
11
+ # Solution
12
+ import requests
13
+ from bs4 import BeautifulSoup
14
+
15
+ def get_html_content_in_text (url ):
16
+ """
17
+ Grab all the content in webpage url and return it's content in text.
18
+
19
+ Arguments:
20
+ url -- a webpage url string.
21
+
22
+ Returns:
23
+ r.text -- the content of webpage in text.
24
+
25
+ """
26
+ r = requests .get (url )
27
+ return r .text
28
+
29
+ def main ():
30
+ content = get_html_content_in_text ('http://www.nytimes.com/' )
31
+ soup = BeautifulSoup (content , "html5lib" )
32
+ for element in soup .find_all (class_ = "story-heading" ):
33
+ if element .a :
34
+ print (element .a .text .replace ("\n " , " " ).strip ())
35
+ else :
36
+ print (element .contents [0 ].strip ())
37
+
38
+ if __name__ == "__main__" :
39
+ main ()
40
+
41
+ # Test Part - 2018-3-29
42
+ # Trump Lawyer Broached Idea of Pardons for 2 Top Ex-Aides
43
+ # Trump Aide Spoke in ’16 to Person Tied to Russia Intelligence
44
+ # Justice Dept. Opens Internal Investigation on Surveillance of Trump Campaign Official
45
+ # ‘Kiss Up, Kick Down’: Recalling Bolton’s Confirmation in 2005
46
+ # Veterans Affairs Chief Is Out, as Trump’s Shake-Up Continues
47
+ # The Trump Administration’s Major Departures
You can’t perform that action at this time.
0 commit comments