About the Coder

Sample script

My sample script is something I used for a lab in my Machine Learning class last semester. It uses a slot machine simulator as an example for Upper Confidence Bound reinforcment learning. This type of machine learning can be used to help other problems such as identifying the best treatment in clinical testing and looking at best performance in online advertising or in game designing.

#slot_machine.py
#
#Author: Abby Gaddi
#
#Date: 2020-02-10
#
#This script shows how Upper Confidence Boundary reinforcment learning
#is used with machine slots as an example, and prompts the user for the starting balance and trial number.
#
#
#LIBRARIES
import random
import math
import argparse


#Slot machine odds function given from class
def playSlots(machine):

    if(machine == 1):
        return round(random.uniform(0,1),2)

    if(machine == 2):
        return round(random.uniform(0.5,1.5),2)

    if(machine>=3 and machine <=10):
        return round(random.triangular(0,0.5,1),2)

    if(machine >=11 and machine <=14):
        return round(random.triangular(0.5,1,1.5),2)

    if(machine == 15):
        return round(random.triangular(0.8,1,3.0),2)

    if(machine >= 16 and machine <= 20):
        return round(random.uniform(0,1.1),2)

    if(machine >20):
        print("There are only 20 slot machines!  You've gone awry.")


def calculate_UCB(balance,N):

    #VARIABLES
    #number of slot machines
    d = 20
    #used for the machine/computer to learn
    machine_record = []
    award_record = []
    number_of_selections = [0] * d
    sum_of_rewards = [0] * d

    #UPPER CONFIDENCE BOUND REINFORCEMENT LEARNING
    for n in range(0,N):

        #initialize the machine choice at one
        machine_choice = 1
        #define the max upper bound, based on the mean and maximum return value
        max_upper_bound = 0

        #calculating which machine has the best return value so it is constantly picked
        for i in range(1,d):
            if number_of_selections[i] > 0:
                #calculate the average money got back
                average_award = sum_of_rewards[i] / number_of_selections[i]
                #calculate the money the machine thinks it will get back
                delta_i = math.sqrt(3/2*math.log(n+1)/number_of_selections[i])
                upper_bound = average_award + delta_i
            else:
                #if the machine is being used for the first time, set the upperbound very high to force the computer to test it
                upper_bound = 1e200

            if upper_bound > max_upper_bound:
                max_upper_bound = upper_bound
                #choose the machine with the highest upper bound
                machine_choice = i

        #calculate earnings/balance
        machine_record.append(machine_choice)
        number_of_selections[machine_choice] = number_of_selections[machine_choice] + 1
        award = playSlots(machine=machine_choice)
        sum_of_rewards[machine_choice] = sum_of_rewards[machine_choice] + award
        award_record.append(award)
        balance = balance - 1 + award
        return('You won: $' + str(round(balance,2))+'\n Machine ' + str(sum_of_rewards.index(max(sum_of_rewards))) +' won you the most money' )


#MAIN
if __name__ == '__main__':
    p = argparse.ArgumentParser(description =  "Play an unrealistic slot machine simulator by gambling an amount you want to play with\
         and see which machine has the best cash reward.")
    args = p.parse_args()

    balance = int(input('How much money would you like to play with? (no dollar sign) '))
    N = int(input('How many times would you like to play? '))
    print(calculate_UCB(balance, N))

About the Coder

Abby Gaddi

Edited: 2020-19-02

Sample script