# Exploration parameters
explore_start = 1.0            # exploration probability at start
explore_stop = 0.01            # minimum exploration probability 
decay_rate = 0.0001            # exponential decay rate for exploration prob

# Explore or Exploit
explore_p = explore_stop + (explore_start - explore_stop)*np.exp(-decay_rate*step) 
if explore_p > np.random.rand():
    # Make a random action
    action = env.action_space.sample()
else:
    # Get action from Q-network
    feed = {mainQN.inputs_: state.reshape((1, *state.shape))}
    Qs = sess.run(mainQN.output, feed_dict=feed)
    action = np.argmax(Qs)

results matching ""

    No results matching ""