Hi everyone!

For this lecture, you will need to define helper functions which include the following (as a hint): 


def is_terminal_state

def get_starting_location

def get_next_action

def get_next_location

def get_shortest_path


It's a great idea to try to solve this with your own ideas and then compare it to the code used to help reinforce the concepts from the course and practical experience. But if you want to see the code used to solve this part, please see the snippet below:




Define Helper Functions: 

def is_terminal_state(current_row_index, current_column_index):
    """
    Function to determine if the specified location is a terminal state
    """

    if rewards[current_row_index, current_column_index] == -1.:
        return False
    else:
        return True

def get_starting_location():
    """
    Function to choose a random non-terminal starting location.
    """

    current_row_index = np.random.randint(environment_rows)

    current_column_index = np.random.randint(environment_columns)

    while is_terminal_state(current_row_index, current_column_index):
        current_row_index = np.random.randint(environment_rows)
        current_column_index = np.random.randint(environment_columns)

    return current_row_index, current_column_index

def get_next_action(current_row_index, current_column_index, epsilon):
    """
    Function to choose the next action, according to the epsilon value.
    """

    if np.random.random() < epsilon:
        return np.argmax(q_values[current_row_index, current_column_index])
    else:
        return np.random.randint(4)

def get_next_location(current_row_index, current_column_index, action_index):
    """
    Function to get the next location based on the chosen action.
    """

    new_row_index = current_row_index
    new_column_index = current_column_index
    
    if actions[action_index] == 'up' and current_row_index > 0:
        new_row_index -= 1
    elif actions[action_index] == 'right' and current_column_index < environment_columns - 1:
        new_column_index += 1
    elif actions[action_index] == 'down' and current_row_index < environment_rows - 1:
        new_row_index += 1
    elif actions[action_index] == 'left' and current_column_index > 0:
        new_column_index -= 1
    return new_row_index, new_column_index

def get_shortest_path(start_row_index, start_column_index):
    """
    Function that will get the shortest path between any location within the city 
    that the postman is allowed to travel and the item packaging location.
    """

    if is_terminal_state(start_row_index, start_column_index):
        return []
    else: 
        current_row_index, current_column_index = start_row_index, start_column_index
        shortest_path = []
        shortest_path.append([current_row_index, current_column_index])

        while not is_terminal_state(current_row_index, current_column_index):
            action_index = get_next_action(current_row_index, current_column_index, 1.)
            current_row_index, current_column_index = get_next_location(current_row_index, current_column_index, action_index)
            shortest_path.append([current_row_index, current_column_index])
            
    return shortest_path