From 1292d51d4ed0172c57ae724273d38ee2aa29ee1a Mon Sep 17 00:00:00 2001 From: Robin Ranjit Singh Chauhan Date: Mon, 11 Nov 2019 08:43:45 -0800 Subject: [PATCH] Minor changes for python 3.6 --- generate_buffer.py | 8 ++++---- main.py | 14 +++++++------- train_expert.py | 10 +++++----- utils.py | 2 +- 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/generate_buffer.py b/generate_buffer.py index 52bc6c4..c77e3d1 100644 --- a/generate_buffer.py +++ b/generate_buffer.py @@ -20,9 +20,9 @@ file_name = "DDPG_%s_%s" % (args.env_name, str(args.seed)) buffer_name = "Robust_%s_%s" % (args.env_name, str(args.seed)) - print "---------------------------------------" - print "Settings: " + file_name - print "---------------------------------------" + print("---------------------------------------") + print("Settings: " + file_name) + print("---------------------------------------") if not os.path.exists("./buffers"): os.makedirs("./buffers") @@ -53,7 +53,7 @@ if done: if total_timesteps != 0: - print("Total T: %d Episode Num: %d Episode T: %d Reward: %f") % (total_timesteps, episode_num, episode_timesteps, episode_reward) + print("Total T: %d Episode Num: %d Episode T: %d Reward: %f" % (total_timesteps, episode_num, episode_timesteps, episode_reward)) # Reset environment obs = env.reset() diff --git a/main.py b/main.py index 040b30c..3bdd2b5 100644 --- a/main.py +++ b/main.py @@ -22,9 +22,9 @@ def evaluate_policy(policy, eval_episodes=10): avg_reward /= eval_episodes - print "---------------------------------------" - print "Evaluation over %d episodes: %f" % (eval_episodes, avg_reward) - print "---------------------------------------" + print("---------------------------------------") + print("Evaluation over %d episodes: %f" % (eval_episodes, avg_reward)) + print("---------------------------------------") return avg_reward @@ -40,9 +40,9 @@ def evaluate_policy(policy, eval_episodes=10): file_name = "BCQ_%s_%s" % (args.env_name, str(args.seed)) buffer_name = "%s_%s_%s" % (args.buffer_type, args.env_name, str(args.seed)) - print "---------------------------------------" - print "Settings: " + file_name - print "---------------------------------------" + print("---------------------------------------") + print("Settings: " + file_name) + print("---------------------------------------") if not os.path.exists("./results"): os.makedirs("./results") @@ -77,4 +77,4 @@ def evaluate_policy(policy, eval_episodes=10): np.save("./results/" + file_name, evaluations) training_iters += args.eval_freq - print "Training iterations: " + str(training_iters) \ No newline at end of file + print("Training iterations: " + str(training_iters)) diff --git a/train_expert.py b/train_expert.py index a30030f..1a0370a 100644 --- a/train_expert.py +++ b/train_expert.py @@ -20,9 +20,9 @@ args = parser.parse_args() file_name = "DDPG_%s_%s" % (args.env_name, str(args.seed)) - print "---------------------------------------" - print "Settings: " + file_name - print "---------------------------------------" + print("---------------------------------------") + print("Settings: " + file_name) + print("---------------------------------------") if not os.path.exists("./pytorch_models"): os.makedirs("./pytorch_models") @@ -50,8 +50,8 @@ if done: - if total_timesteps != 0: - print("Total T: %d Episode Num: %d Episode T: %d Reward: %f") % (total_timesteps, episode_num, episode_timesteps, episode_reward) + if total_timesteps != 0: + print("Total T: %d Episode Num: %d Episode T: %d Reward: %f" % (total_timesteps, episode_num, episode_timesteps, episode_reward)) policy.train(replay_buffer, episode_timesteps) # Save policy diff --git a/utils.py b/utils.py index 281480f..06a9657 100644 --- a/utils.py +++ b/utils.py @@ -34,4 +34,4 @@ def save(self, filename): np.save("./buffers/"+filename+".npy", self.storage) def load(self, filename): - self.storage = np.load("./buffers/"+filename+".npy") \ No newline at end of file + self.storage = np.load("./buffers/"+filename+".npy",allow_pickle=True)