Sample Script WebScraper (.py) This sample script is something I wrote to help someone who wanted to know how to scrape a website. This script was to serve as teaching material or an intro into the basics of webscraping.
#reference_page
#("https://www.digitalocean.com/community/tutorials/how-to-scrape-web-pages-with-beautiful-soup-and-python-3")
#(https://www.guru99.com/reading-and-writing-files-in-python.html)
import requests #making http requests in python
from bs4 import BeautifulSoup #helps with parsing and grabbing info
= "https://www.wm.edu/as/computerscience/undergraduate/major/index.php"
web_address = requests.get(web_address)
page
#Creating a soup object
#collect text from page and parse page
= BeautifulSoup(page.text, "html.parser")
soup
#Grab info or data from the user_content class div block
= soup.find(class_ = "user_content")
cs_text
#Grab text from all instances of <p> tag in the user_content class div block
= cs_text.find_all("p")
cs_text_info
#For loop to print out all texts within block
# for text in cs_text_info:
# print(text.prettify()) #prettify() method helps turned the parsed info into nice unicode string
#.contents pulls out the text found inside the <p> tags
# for text in cs_text_info:
# information = text.contents[0]
# print(information)
= open("scrapedtext.txt", "w+") #creates a new text file
text_doc for text in cs_text_info:
= text.contents[0]
information +"\n")
text_doc.write(information text_doc.close()