This example covers some basic functionality of urllib2 - opening a website, reading from it and finding its title. diff --git a/data/GSOC examples/Opening websites b/data/GSOC examples/Opening we new file mode 100644 index 0000000..97ee16f --- /dev/null +++ b/data/GSOC examples/Opening websites @@ -0,0 +1,64 @@ +# This example demonstrates how urllib2 can be used to open websites and read +# some data from them. + +import urllib2 + +# define a function which will open a bunch of links we give it in a list +def open_sites(links): + sites = [] + for url in urls: + print "Opening: " + url + # try to open that site + try: + site = urllib2.urlopen(url) + except: + # Does an error occur with any of the default urls? + # Practice: If so, could you fix it? + print "An error has occured, skipping " + url + print + raw_input("...press enter key to continue...") + continue + if site.geturl() != url: + print "Careful! Site " + url + " has redirected you to " + site.get + print "Site " + site.geturl() + " is now open." + print + sites.append(site) + raw_input("...press enter key to continue...") + print + return sites + +url1 = "<a href="http://www.google.com">http://www.google.com</a>" +url2 = "<a href="http://www.sugarlabs.org">http://www.sugarlabs.org</a>" +url3 = "<a href="http://www.wikipedia.org">www.wikipedia.org</a>" +urls = [url1, url2, url3] + +sites = open_sites(urls) + +print +print "Let's read those sites and find their titles." +print +raw_input("...press enter key to continue...") +print + +for site in sites: + site_content = site.read() + title_at = site_content.find("<title>") + 7 + print "The title of site at " + site.geturl() + " begins at its index " + s + title_ends = site_content.find("</title>", title_at) + title = site_content[title_at:title_ends] + # In Python, \ is the so-called "escape" character. Since some characters h + # special meanings, like " or ' opening and closing a string, we have to te + # the interpreter to ignore such meanings when we wish to put those precise + # characters in a string (or print them). In the following line, we wish to + # print the " character so we "escape" it - by putting \ in before it. + # Practice: What would we have to do to print an escape character \ ? + print "The title is: \"" + title + "\"" + print + # An index of -1 refers to the first element from the end. Thus, this + # comparison checks whether the current element is the last one. + # Practice: Why would we want that? + if site == sites[-1]: + raw_input("...press enter to finish..:") + else: + raw_input("...press enter key to continue...") + print