This is from a modeling gig I had over the summer
I am a senior majoring in Data Science and minoring in Business Analytics with an unofficial minor in Hispanic Studies. I am involved in our school’s chapter of Chi Omega, and am the Personal Development and Wellness Chair for the 2021 calendar year.
I have experience working as an analyst for a small management consulting firm in the Washington DC Metro area. I currently am completing the Tableau Desktop Specialist certification.
I am passionate and enjoy talking about fashion, feminism, and sustainability. I enjoy reading, working out, and being creative.
below is a python script that will determine the duplicate files in a given folder
import os
import sys
import hashlib
#find duplicates of files in parentFolder
def findDuplicate(parentFolder):
# duplicates in format {hash:[names]}
duplicates = {}
for dirName, subdirs, filelist in os.walk(parentFolder):
print('Scanning %s...' % dirName)
for filename in filelist:
path = os.path.join(dirName, filename)
file_hash = hashfile(path)
if file_hash in duplicates:
duplicates[file_hash].append(path)
else:
duplicates[file_hash] = [path]
return duplicates
# join two dictionaries
def joinDictionaries(dict1, dict2):
for key in dict2.keys():
if key in dict1:
dict1[key] = dict1[key] + dict2[key]
else:
dict1[key] = dict2[key]
# calculate hash of given file
def hashfile(path, blocksize = 65536):
afile = open(path, 'rb')
hasher = hashlib.md5()
buf = afile.read(blocksize)
while len(buf)>0:
hasher.update(buf)
buf = afile.read(blocksize)
afile.close()
return hasher.hexdigest()
if __name__ == '__main__':
if len(sys.argv) > 1:
duplicates = {}
folders = sys.argv[1:]
for i in folders:
if os.path.exists(i):
joinDictionaries(duplicates, findDuplicate(i))
else:
print('%s is not a valid path')
sys.exit()
print(duplicates)
else:
print('Terminal Input: python duplicateFinder.py folder or python duplicateFinder.py folder1 folder2 folder3')