This is from a modeling gig I had over the summer
I am a senior majoring in Data Science and minoring in Business Analytics with an unofficial minor in Hispanic Studies. I am involved in our school’s chapter of Chi Omega, and am the Personal Development and Wellness Chair for the 2021 calendar year.
I have experience working as an analyst for a small management consulting firm in the Washington DC Metro area. I currently am completing the Tableau Desktop Specialist certification.
I am passionate and enjoy talking about fashion, feminism, and sustainability. I enjoy reading, working out, and being creative.
below is a python script that will determine the duplicate files in a given folder
import os
import sys
import hashlib
#find duplicates of files in parentFolder
def findDuplicate(parentFolder):
# duplicates in format {hash:[names]}
= {}
duplicates for dirName, subdirs, filelist in os.walk(parentFolder):
print('Scanning %s...' % dirName)
for filename in filelist:
= os.path.join(dirName, filename)
path = hashfile(path)
file_hash if file_hash in duplicates:
duplicates[file_hash].append(path)else:
= [path]
duplicates[file_hash] return duplicates
# join two dictionaries
def joinDictionaries(dict1, dict2):
for key in dict2.keys():
if key in dict1:
= dict1[key] + dict2[key]
dict1[key] else:
= dict2[key]
dict1[key]
# calculate hash of given file
def hashfile(path, blocksize = 65536):
= open(path, 'rb')
afile = hashlib.md5()
hasher = afile.read(blocksize)
buf while len(buf)>0:
hasher.update(buf)= afile.read(blocksize)
buf
afile.close()return hasher.hexdigest()
if __name__ == '__main__':
if len(sys.argv) > 1:
= {}
duplicates = sys.argv[1:]
folders for i in folders:
if os.path.exists(i):
joinDictionaries(duplicates, findDuplicate(i))else:
print('%s is not a valid path')
sys.exit()print(duplicates)
else:
print('Terminal Input: python duplicateFinder.py folder or python duplicateFinder.py folder1 folder2 folder3')