Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions generate_buffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@

file_name = "DDPG_%s_%s" % (args.env_name, str(args.seed))
buffer_name = "Robust_%s_%s" % (args.env_name, str(args.seed))
print "---------------------------------------"
print "Settings: " + file_name
print "---------------------------------------"
print("---------------------------------------")
print("Settings: " + file_name)
print("---------------------------------------")

if not os.path.exists("./buffers"):
os.makedirs("./buffers")
Expand Down Expand Up @@ -53,7 +53,7 @@
if done:

if total_timesteps != 0:
print("Total T: %d Episode Num: %d Episode T: %d Reward: %f") % (total_timesteps, episode_num, episode_timesteps, episode_reward)
print("Total T: %d Episode Num: %d Episode T: %d Reward: %f" % (total_timesteps, episode_num, episode_timesteps, episode_reward))

# Reset environment
obs = env.reset()
Expand Down
14 changes: 7 additions & 7 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ def evaluate_policy(policy, eval_episodes=10):

avg_reward /= eval_episodes

print "---------------------------------------"
print "Evaluation over %d episodes: %f" % (eval_episodes, avg_reward)
print "---------------------------------------"
print("---------------------------------------")
print("Evaluation over %d episodes: %f" % (eval_episodes, avg_reward))
print("---------------------------------------")
return avg_reward


Expand All @@ -40,9 +40,9 @@ def evaluate_policy(policy, eval_episodes=10):

file_name = "BCQ_%s_%s" % (args.env_name, str(args.seed))
buffer_name = "%s_%s_%s" % (args.buffer_type, args.env_name, str(args.seed))
print "---------------------------------------"
print "Settings: " + file_name
print "---------------------------------------"
print("---------------------------------------")
print("Settings: " + file_name)
print("---------------------------------------")

if not os.path.exists("./results"):
os.makedirs("./results")
Expand Down Expand Up @@ -77,4 +77,4 @@ def evaluate_policy(policy, eval_episodes=10):
np.save("./results/" + file_name, evaluations)

training_iters += args.eval_freq
print "Training iterations: " + str(training_iters)
print("Training iterations: " + str(training_iters))
10 changes: 5 additions & 5 deletions train_expert.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@
args = parser.parse_args()

file_name = "DDPG_%s_%s" % (args.env_name, str(args.seed))
print "---------------------------------------"
print "Settings: " + file_name
print "---------------------------------------"
print("---------------------------------------")
print("Settings: " + file_name)
print("---------------------------------------")

if not os.path.exists("./pytorch_models"):
os.makedirs("./pytorch_models")
Expand Down Expand Up @@ -50,8 +50,8 @@

if done:

if total_timesteps != 0:
print("Total T: %d Episode Num: %d Episode T: %d Reward: %f") % (total_timesteps, episode_num, episode_timesteps, episode_reward)
if total_timesteps != 0:
print("Total T: %d Episode Num: %d Episode T: %d Reward: %f" % (total_timesteps, episode_num, episode_timesteps, episode_reward))
policy.train(replay_buffer, episode_timesteps)

# Save policy
Expand Down
2 changes: 1 addition & 1 deletion utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,4 @@ def save(self, filename):
np.save("./buffers/"+filename+".npy", self.storage)

def load(self, filename):
self.storage = np.load("./buffers/"+filename+".npy")
self.storage = np.load("./buffers/"+filename+".npy",allow_pickle=True)