Iām a Data Science major, and a computer science minor. I am from Northern Virginia.
About me:
I am a Boston Celtics Fan (picture below)
I was born in The Netherlands
I have a Yorkie
The goal of my script is to help sumarize large and complex readings. It does this by finding the most common words in a text file, and then Googling them and returning the first five links. These links will normally be Wikipedia articles, and other simple sources which cover the key concepts in the reading.
import collections
import pandas as pd0
import argparse
if __name__ == "__main__":
# Check command-line arguments
= argparse.ArgumentParser(add_help=False)
parser '-h', '--help', action='help', default=argparse.SUPPRESS,
parser.add_argument(help='The point of this script is to help summarize long readings.It finds the most used words in a text and returns shorter and simpler articles which go over these concepts.')
=parser.parse_args()
args
file = open('NDFM.txt', encoding="utf8")#input file
= file.read()
a
# Stopwords in text file
= set(line.strip() for line in open('stopwords.txt'))
stopwords = stopwords.union(set(['mr','mrs','one','two','said']))
stopwords
= {}
wordcount for word in a.lower().split():
= word.replace(".","")
word = word.replace(",","")
word = word.replace(":","")
word = word.replace(""","")
word word = word.replace("!","")
word = word.replace("?","")
if word not in stopwords:
if word not in wordcount:
wordcount[word] = 1
else:
wordcount[word] += 1
# Print most common word
n_print = int(input("How many key words to google?: "))
word_counter = collections.Counter(wordcount)
googleString=""
for word, count in word_counter.most_common(n_print):
googleString = googleString +" "+ word
try:
from googlesearch import search
except ImportError:
print("No module named 'google' found")
for j in search(googleString, tld="co.in", num=10, stop=5, pause=2):
print(j)
file.close()