To fit with the starfish theme from my username (CSStarfish), here’s a picture of a starfish relaxing on the beach :)
Favorite Foods:
My sample script is a file I wrote for a final project in my Applied Machine Learning class. It accepts video streams from either a webcam or pre-recorded video files, and then utilizes a convolutional neural network to classify the person in the video’s facial expression as one of the seven basic emotions: happy, sad, angry, fearful, disgusted, surprised, and neutral. I thought that this could be useful for those who are visually-impaired, who may have difficulty reading facial expressions while they are speaking with someone. This file provides a real-time indicator of the emotional state of the person they are communicating with through audio and visual outputs of the person’s emotional state.
See also my script called Make Art (.py).
#The corresponding video files used towards the end of the file are saved to my public Machine Learning repository
import cv2
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image
import matplotlib.pyplot as plt
import os
from gtts import gTTS
import numpy as np
= 'Training Data (Final Project)'
training_path = os.path.join(training_path, 'Happy') # directory with our training happy pictures
train_happy_dir = os.path.join(training_path, 'Sad') # directory with our training sad pictures
train_sad_dir = os.path.join(training_path, 'Anger') # directory with our training angry pictures
train_anger_dir = os.path.join(training_path, 'Fear') # directory with our training fear pictures
train_fear_dir = os.path.join(training_path, 'Surprise') # directory with our training surprise pictures
train_surprise_dir = os.path.join(training_path, 'Disgust') # directory with our training disgust pictures
train_disgust_dir = os.path.join(training_path, 'Neutral') # directory with our training neutral pictures
train_neutral_dir = ImageDataGenerator(rescale=1./255, horizontal_flip=True)
training_datagen = training_datagen.flow_from_directory(training_path, target_size=(48,48), batch_size=64, color_mode="grayscale", class_mode='categorical')
training_gen
= 'Testing Data (Final Project)'
testing_path = os.path.join(testing_path, 'Happy') # directory with our testing happy pictures
test_happy_dir = os.path.join(testing_path, 'Sad') # directory with our testing sad pictures
test_sad_dir = os.path.join(testing_path, 'Anger') # directory with our testing angry pictures
test_anger_dir = os.path.join(testing_path, 'Fear') # directory with our testing fear pictures
test_fear_dir = os.path.join(testing_path, 'Surprise') # directory with our testing surprise pictures
test_surprise_dir = os.path.join(testing_path, 'Disgust') # directory with our testing disgust pictures
test_disgust_dir = os.path.join(testing_path, 'Neutral') # directory with our testing neutral pictures
test_neutral_dir = ImageDataGenerator(rescale=1./255, horizontal_flip=True)
testing_datagen = testing_datagen.flow_from_directory(testing_path, target_size=(48,48), batch_size=64, color_mode="grayscale", class_mode='categorical')
testing_gen
#Count how many images there are in each directory
= len(os.listdir(train_happy_dir))
num_happy_tr #print(num_happy_tr)
= len(os.listdir(train_sad_dir))
num_sad_tr #print(num_sad_tr)
= len(os.listdir(train_anger_dir))
num_anger_tr #print(num_anger_tr)
= len(os.listdir(train_fear_dir))
num_fear_tr #print(num_fear_tr)
= len(os.listdir(train_surprise_dir))
num_surprise_tr #print(num_surprise_tr)
= len(os.listdir(train_disgust_dir))
num_disgust_tr #print(num_disgust_tr)
= len(os.listdir(train_neutral_dir))
num_neutral_tr #print(num_neutral_tr)
for emotion in os.listdir(training_path + '/'):
print(str(len(os.listdir(training_path + '/' + emotion))) + ' ' + emotion + ' Faces in the training dataset.')
= len(os.listdir(test_happy_dir))
num_happy_tst #print(num_happy_tst)
= len(os.listdir(test_sad_dir))
num_sad_tst #print(num_sad_tst)
= len(os.listdir(test_anger_dir))
num_anger_tst #print(num_anger_tst)
= len(os.listdir(test_fear_dir))
num_fear_tst #print(num_fear_tst)
= len(os.listdir(test_surprise_dir))
num_surprise_tst #print(num_surprise_tst)
= len(os.listdir(test_disgust_dir))
num_disgust_tst #print(num_disgust_tst)
= len(os.listdir(test_neutral_dir))
num_neutral_tst #print(num_neutral_tst)
for emotion in os.listdir(testing_path + '/'):
print(str(len(os.listdir(testing_path + '/' + emotion))) + ' ' + emotion + ' Faces in the testing dataset.')
= num_happy_tr + num_sad_tr + num_anger_tr + num_fear_tr + num_surprise_tr + num_disgust_tr + num_neutral_tr
total_train print(str(total_train) + ' faces in the training dataset.')
= num_happy_tst + num_sad_tst + num_anger_tst + num_fear_tst + num_surprise_tst + num_disgust_tst + num_neutral_tst
total_test print(str(total_test) + ' faces in the testing dataset.')
#Create the model
= Sequential([
model 64, (3,3), activation='relu', padding='same', input_shape=(48,48,1)),
Conv2D(2,2)),
MaxPooling2D((128, (3,3), activation='relu', padding='same'),
Conv2D(2,2)),
MaxPooling2D((256, (3,3), activation='relu', padding='same'),
Conv2D(2,2)),
MaxPooling2D((
Flatten(),256, activation='relu'),
Dense(64, activation='relu'),
Dense(7, activation='softmax')
Dense(
])
compile(loss='categorical_crossentropy', optimizer=RMSprop(lr=0.001), metrics=['accuracy'])
model.= model.fit(training_gen, steps_per_epoch=(total_train//64), epochs=30,
history =testing_gen, validation_steps=(total_test//64))
validation_data
model.summary()
= history.history['accuracy']
acc = history.history['val_accuracy']
train_acc
= history.history['loss']
loss = history.history['val_loss']
train_loss
= range(30)
epochs_range
=(8, 8))
plt.figure(figsize1, 2, 1)
plt.subplot(='Training Accuracy')
plt.plot(epochs_range, acc, label='Validation Accuracy')
plt.plot(epochs_range, train_acc, label='lower right')
plt.legend(loc'Training and Validation Accuracy')
plt.title(
1, 2, 2)
plt.subplot(='Training Loss')
plt.plot(epochs_range, loss, label='Validation Loss')
plt.plot(epochs_range, train_loss, label='upper right')
plt.legend(loc'Training and Validation Loss')
plt.title(
plt.show()
#Read video camera stream
False)
cv2.ocl.setUseOpenCL(= ["Happy", "Sad", "Angry", "Scared", "Surprised", "Disgusted", "Neutral"]
emotions = cv2.VideoCapture(0)
cap
while True:
= cap.read()
ret, frame if not ret:
break
= cv2.CascadeClassifier('C:/Python38/Lib/site-packages/cv2/data/haarcascade_frontalface_default.xml')
face_scan = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
gray_fr = face_scan.detectMultiScale(gray_fr, 1.3, 5)
faces
for (x, y, w, h) in faces:
= gray_fr[y:y+h, x:x+w]
fc
= cv2.resize(fc, (48, 48))
roi = model.predict(roi[np.newaxis, :, :, np.newaxis])
prediction = np.argmax(prediction)
index
4, (0, 128, 128), 6)
cv2.putText(frame, emotions[index], (x, y), cv2.FONT_HERSHEY_SIMPLEX, +w, y+h), (0, 128, 128), 2)
cv2.rectangle(frame, (x, y), (x
'Video', cv2.resize(frame, (1200,860), interpolation=cv2.INTER_CUBIC))
cv2.imshow(if cv2.waitKey(1) & 0xFF == ord('q'):
= index
current_index
cap.release()
cv2.destroyAllWindows()
if current_index == 0:
= 'This person looks happy!'
pred_text = 'en'
language = gTTS(text=pred_text, lang=language, slow=False)
speech 'emotion.mp3')
speech.save('emotion.mp3')
os.system(break
elif current_index == 1:
= 'This person looks sad.'
pred_text = 'en'
language = gTTS(text=pred_text, lang=language, slow=False)
speech 'emotion.mp3')
speech.save('emotion.mp3')
os.system(break
elif current_index == 2:
= 'This person looks angry.'
pred_text = 'en'
language = gTTS(text=pred_text, lang=language, slow=False)
speech 'emotion.mp3')
speech.save('emotion.mp3')
os.system(break
elif current_index == 3:
= 'This person looks scared!'
pred_text = 'en'
language = gTTS(text=pred_text, lang=language, slow=False)
speech 'emotion.mp3')
speech.save('emotion.mp3')
os.system(break
elif current_index == 4:
= 'This person looks disgusted.'
pred_text = 'en'
language = gTTS(text=pred_text, lang=language, slow=False)
speech 'emotion.mp3')
speech.save('emotion.mp3')
os.system(break
elif current_index == 5:
= 'This person looks surprised!'
pred_text = 'en'
language = gTTS(text=pred_text, lang=language, slow=False)
speech 'emotion.mp3')
speech.save('emotion.mp3')
os.system(break
elif current_index == 6:
= 'This person looks neutral.'
pred_text = 'en'
language = gTTS(text=pred_text, lang=language, slow=False)
speech 'emotion.mp3')
speech.save('emotion.mp3')
os.system(break
= cv2.VideoCapture('happy.mp4')
cap
while True:
= cap.read()
ret, frame if not ret:
break
= cv2.CascadeClassifier('C:/Python38/Lib/site-packages/cv2/data/haarcascade_frontalface_default.xml')
face_scan = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
gray_fr = face_scan.detectMultiScale(gray_fr, 1.3, 5)
faces
for (x, y, w, h) in faces:
= gray_fr[y:y+h, x:x+w]
fc
= cv2.resize(fc, (48, 48))
roi = model.predict(roi[np.newaxis, :, :, np.newaxis])
prediction = np.argmax(prediction)
index
-3), cv2.FONT_HERSHEY_SIMPLEX, 8, (0, 200, 255), 8)
cv2.putText(frame, emotions[index], (x, y+w, y+h), (0, 200, 255), 8)
cv2.rectangle(frame, (x, y), (x
'Video', cv2.resize(frame, (1200,860), interpolation=cv2.INTER_CUBIC))
cv2.imshow(
if cv2.waitKey(1) & 0xFF == ord('q'):
= index
current_index
cap.release()
cv2.destroyAllWindows()if current_index == 0:
= 'This person looks happy!'
pred_text = 'en'
language = gTTS(text=pred_text, lang=language, slow=False)
speech 'emotion.mp3')
speech.save('emotion.mp3')
os.system(break
elif current_index == 1:
= 'This person looks sad.'
pred_text = 'en'
language = gTTS(text=pred_text, lang=language, slow=False)
speech 'emotion.mp3')
speech.save('emotion.mp3')
os.system(break
elif current_index == 2:
= 'This person looks angry.'
pred_text = 'en'
language = gTTS(text=pred_text, lang=language, slow=False)
speech 'emotion.mp3')
speech.save('emotion.mp3')
os.system(break
elif current_index == 3:
= 'This person looks scared!'
pred_text = 'en'
language = gTTS(text=pred_text, lang=language, slow=False)
speech 'emotion.mp3')
speech.save('emotion.mp3')
os.system(break
elif current_index == 4:
= 'This person looks disgusted.'
pred_text = 'en'
language = gTTS(text=pred_text, lang=language, slow=False)
speech 'emotion.mp3')
speech.save('emotion.mp3')
os.system(break
elif current_index == 5:
= 'This person looks surprised!'
pred_text = 'en'
language = gTTS(text=pred_text, lang=language, slow=False)
speech 'emotion.mp3')
speech.save('emotion.mp3')
os.system(break
elif current_index == 6:
= 'This person looks neutral.'
pred_text = 'en'
language = gTTS(text=pred_text, lang=language, slow=False)
speech 'emotion.mp3')
speech.save('emotion.mp3')
os.system(break
= cv2.VideoCapture('angry.mp4')
cap
while True:
= cap.read()
ret, frame if not ret:
break
= cv2.CascadeClassifier('C:/Python38/Lib/site-packages/cv2/data/haarcascade_frontalface_default.xml')
face_scan = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
gray_fr = face_scan.detectMultiScale(gray_fr, 1.3, 5)
faces
for (x, y, w, h) in faces:
= gray_fr[y:y+h, x:x+w]
fc
= cv2.resize(fc, (48, 48))
roi = model.predict(roi[np.newaxis, :, :, np.newaxis])
prediction = np.argmax(prediction)
index
-3), cv2.FONT_HERSHEY_SIMPLEX, 8, (100, 0, 255), 8)
cv2.putText(frame, emotions[index], (x, y+w, y+h), (100, 0, 255), 6)
cv2.rectangle(frame, (x, y), (x
'Video', cv2.resize(frame, (1200, 860), interpolation=cv2.INTER_CUBIC))
cv2.imshow(
if cv2.waitKey(1) & 0xFF == ord('q'):
= index
current_index
cap.release()
cv2.destroyAllWindows()if current_index == 0:
= 'This person looks happy!'
pred_text = 'en'
language = gTTS(text=pred_text, lang=language, slow=False)
speech 'emotion.mp3')
speech.save('emotion.mp3')
os.system(break
elif current_index == 1:
= 'This person looks sad.'
pred_text = 'en'
language = gTTS(text=pred_text, lang=language, slow=False)
speech 'emotion.mp3')
speech.save('emotion.mp3')
os.system(break
elif current_index == 2:
= 'This person looks angry.'
pred_text = 'en'
language = gTTS(text=pred_text, lang=language, slow=False)
speech 'emotion.mp3')
speech.save('emotion.mp3')
os.system(break
elif current_index == 3:
= 'This person looks scared!'
pred_text = 'en'
language = gTTS(text=pred_text, lang=language, slow=False)
speech 'emotion.mp3')
speech.save('emotion.mp3')
os.system(break
elif current_index == 4:
= 'This person looks disgusted.'
pred_text = 'en'
language = gTTS(text=pred_text, lang=language, slow=False)
speech 'emotion.mp3')
speech.save('emotion.mp3')
os.system(break
elif current_index == 5:
= 'This person looks surprised!'
pred_text = 'en'
language = gTTS(text=pred_text, lang=language, slow=False)
speech 'emotion.mp3')
speech.save('emotion.mp3')
os.system(break
elif current_index == 6:
= 'This person looks neutral.'
pred_text = 'en'
language = gTTS(text=pred_text, lang=language, slow=False)
speech 'emotion.mp3')
speech.save('emotion.mp3')
os.system(break
= cv2.VideoCapture('sad.mp4')
cap
while True:
= cap.read()
ret, frame if not ret:
break
= cv2.CascadeClassifier('C:/Python38/Lib/site-packages/cv2/data/haarcascade_frontalface_default.xml')
face_scan = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
gray_fr = face_scan.detectMultiScale(gray_fr, 1.3, 5)
faces
for (x, y, w, h) in faces:
= gray_fr[y:y+h, x:x+w]
fc
= cv2.resize(fc, (48, 48))
roi = model.predict(roi[np.newaxis, :, :, np.newaxis])
prediction = np.argmax(prediction)
index
-3), cv2.FONT_HERSHEY_SIMPLEX, 8, (200, 0, 0), 8)
cv2.putText(frame, emotions[index], (x, y+w, y+h), (200, 0, 0), 6)
cv2.rectangle(frame, (x, y), (x
'Video', cv2.resize(frame, (1200, 860), interpolation=cv2.INTER_CUBIC))
cv2.imshow(
if cv2.waitKey(1) & 0xFF == ord('q'):
= index
current_index
cap.release()
cv2.destroyAllWindows()if current_index == 0:
= 'This person looks happy!'
pred_text = 'en'
language = gTTS(text=pred_text, lang=language, slow=False)
speech 'emotion.mp3')
speech.save('emotion.mp3')
os.system(break
elif current_index == 1:
= 'This person looks sad.'
pred_text = 'en'
language = gTTS(text=pred_text, lang=language, slow=False)
speech 'emotion.mp3')
speech.save('emotion.mp3')
os.system(break
elif current_index == 2:
= 'This person looks angry.'
pred_text = 'en'
language = gTTS(text=pred_text, lang=language, slow=False)
speech 'emotion.mp3')
speech.save('emotion.mp3')
os.system(break
elif current_index == 3:
= 'This person looks scared!'
pred_text = 'en'
language = gTTS(text=pred_text, lang=language, slow=False)
speech 'emotion.mp3')
speech.save('emotion.mp3')
os.system(break
elif current_index == 4:
= 'This person looks disgusted.'
pred_text = 'en'
language = gTTS(text=pred_text, lang=language, slow=False)
speech 'emotion.mp3')
speech.save('emotion.mp3')
os.system(break
elif current_index == 5:
= 'This person looks surprised!'
pred_text = 'en'
language = gTTS(text=pred_text, lang=language, slow=False)
speech 'emotion.mp3')
speech.save('emotion.mp3')
os.system(break
elif current_index == 6:
= 'This person looks neutral.'
pred_text = 'en'
language = gTTS(text=pred_text, lang=language, slow=False)
speech 'emotion.mp3')
speech.save('emotion.mp3')
os.system(break
= cv2.VideoCapture('neutral.mp4')
cap
while True:
= cap.read()
ret, frame if not ret:
break
= cv2.CascadeClassifier('C:/Python38/Lib/site-packages/cv2/data/haarcascade_frontalface_default.xml')
face_scan = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
gray_fr = face_scan.detectMultiScale(gray_fr, 1.3, 5)
faces
for (x, y, w, h) in faces:
= gray_fr[y:y+h, x:x+w]
fc
= cv2.resize(fc, (48, 48))
roi = model.predict(roi[np.newaxis, :, :, np.newaxis])
prediction = np.argmax(prediction)
index
-3), cv2.FONT_HERSHEY_SIMPLEX, 8, (100, 100, 0), 8)
cv2.putText(frame, emotions[index], (x, y+w, y+h), (100, 100, 0), 6)
cv2.rectangle(frame, (x, y), (x
'Video', cv2.resize(frame, (1200, 860), interpolation=cv2.INTER_CUBIC))
cv2.imshow(
if cv2.waitKey(1) & 0xFF == ord('q'):
= index
current_index
cap.release()
cv2.destroyAllWindows()if current_index == 0:
= 'This person looks happy!'
pred_text = 'en'
language = gTTS(text=pred_text, lang=language, slow=False)
speech 'emotion.mp3')
speech.save('emotion.mp3')
os.system(break
elif current_index == 1:
= 'This person looks sad.'
pred_text = 'en'
language = gTTS(text=pred_text, lang=language, slow=False)
speech 'emotion.mp3')
speech.save('emotion.mp3')
os.system(break
elif current_index == 2:
= 'This person looks angry.'
pred_text = 'en'
language = gTTS(text=pred_text, lang=language, slow=False)
speech 'emotion.mp3')
speech.save('emotion.mp3')
os.system(break
elif current_index == 3:
= 'This person looks scared!'
pred_text = 'en'
language = gTTS(text=pred_text, lang=language, slow=False)
speech 'emotion.mp3')
speech.save('emotion.mp3')
os.system(break
elif current_index == 4:
= 'This person looks disgusted.'
pred_text = 'en'
language = gTTS(text=pred_text, lang=language, slow=False)
speech 'emotion.mp3')
speech.save('emotion.mp3')
os.system(break
elif current_index == 5:
= 'This person looks surprised!'
pred_text = 'en'
language = gTTS(text=pred_text, lang=language, slow=False)
speech 'emotion.mp3')
speech.save('emotion.mp3')
os.system(break
elif current_index == 6:
= 'This person looks neutral.'
pred_text = 'en'
language = gTTS(text=pred_text, lang=language, slow=False)
speech 'emotion.mp3')
speech.save('emotion.mp3')
os.system(break
= cv2.VideoCapture('SeveralEmotions.mpg')
cap
while True:
= cap.read()
ret, frame if not ret:
break
= cv2.CascadeClassifier('C:/Python38/Lib/site-packages/cv2/data/haarcascade_frontalface_default.xml')
face_scan = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
gray_fr = face_scan.detectMultiScale(gray_fr, 1.3, 5)
faces
for (x, y, w, h) in faces:
= gray_fr[y:y+h, x:x+w]
fc
= cv2.resize(fc, (48, 48))
roi = model.predict(roi[np.newaxis, :, :, np.newaxis])
prediction = np.argmax(prediction)
index
-3), cv2.FONT_HERSHEY_SIMPLEX, 8, (100, 0, 225), 8)
cv2.putText(frame, emotions[index], (x, y+w, y+h), (100, 0, 225), 6)
cv2.rectangle(frame, (x, y), (x
'Video', cv2.resize(frame, (1200, 860), interpolation=cv2.INTER_CUBIC))
cv2.imshow(
if cv2.waitKey(1) & 0xFF == ord('q'):
= index
current_index
cap.release()
cv2.destroyAllWindows()if current_index == 0:
= 'This person looks happy!'
pred_text = 'en'
language = gTTS(text=pred_text, lang=language, slow=False)
speech 'emotion.mp3')
speech.save('emotion.mp3')
os.system(break
elif current_index == 1:
= 'This person looks sad.'
pred_text = 'en'
language = gTTS(text=pred_text, lang=language, slow=False)
speech 'emotion.mp3')
speech.save('emotion.mp3')
os.system(break
elif current_index == 2:
= 'This person looks angry.'
pred_text = 'en'
language = gTTS(text=pred_text, lang=language, slow=False)
speech 'emotion.mp3')
speech.save('emotion.mp3')
os.system(break
elif current_index == 3:
= 'This person looks scared!'
pred_text = 'en'
language = gTTS(text=pred_text, lang=language, slow=False)
speech 'emotion.mp3')
speech.save('emotion.mp3')
os.system(break
elif current_index == 4:
= 'This person looks disgusted.'
pred_text = 'en'
language = gTTS(text=pred_text, lang=language, slow=False)
speech 'emotion.mp3')
speech.save('emotion.mp3')
os.system(break
elif current_index == 5:
= 'This person looks surprised!'
pred_text = 'en'
language = gTTS(text=pred_text, lang=language, slow=False)
speech 'emotion.mp3')
speech.save('emotion.mp3')
os.system(break
elif current_index == 6:
= 'This person looks neutral.'
pred_text = 'en'
language = gTTS(text=pred_text, lang=language, slow=False)
speech 'emotion.mp3')
speech.save('emotion.mp3')
os.system(break