From 98913c4369faf46ad432375363216b0705f96ce6 Mon Sep 17 00:00:00 2001 From: Aman Ashray Date: Tue, 25 Jun 2019 12:37:20 +1000 Subject: [PATCH 1/3] Update README.md --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 5e355d4..87879e3 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,5 @@ +Forked and corrected. working model + # Simple Word-based Language Model in PyTorch This model is directly analagous to this [Tesnsorflow's LM](https://www.tensorflow.org/tutorials/recurrent). In fact, the reader is directly taken from its older version @@ -21,4 +23,4 @@ On GTX 1080 I am getting around 7,400 wps. ## Requirements * Python 3 (I used Anaconda distribution) -* PyTorch (I used 0.1.12) \ No newline at end of file +* PyTorch (I used 0.1.12) From c52bbed5963f8ac8c853633f8532540423b43c45 Mon Sep 17 00:00:00 2001 From: Aman Ashray Date: Tue, 25 Jun 2019 12:37:39 +1000 Subject: [PATCH 2/3] Update README.md --- README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/README.md b/README.md index 87879e3..04a7806 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,3 @@ -Forked and corrected. working model - # Simple Word-based Language Model in PyTorch This model is directly analagous to this [Tesnsorflow's LM](https://www.tensorflow.org/tutorials/recurrent). In fact, the reader is directly taken from its older version From 9c44e01799a3b5c27127122b69e5ce0d89642823 Mon Sep 17 00:00:00 2001 From: Aman Ashray Date: Tue, 25 Jun 2019 12:40:30 +1000 Subject: [PATCH 3/3] Update ptb-lm.py --- ptb-lm.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/ptb-lm.py b/ptb-lm.py index f43c754..eb49e89 100644 --- a/ptb-lm.py +++ b/ptb-lm.py @@ -7,9 +7,10 @@ from lm import repackage_hidden, LM_LSTM import reader import numpy as np +import copy parser = argparse.ArgumentParser(description='Simplest LSTM-based language model in PyTorch') -parser.add_argument('--data', type=str, default='data', +parser.add_argument('--data', type=str, default='data/penn-treebank/', help='location of the data corpus') parser.add_argument('--hidden_size', type=int, default=1500, help='size of word embeddings') @@ -39,18 +40,28 @@ def run_epoch(model, data, is_train=False, lr=1.0): epoch_size = ((len(data) // model.batch_size) - 1) // model.num_steps start_time = time.time() hidden = model.init_hidden() + savehidden = copy.deepcopy(hidden) costs = 0.0 iters = 0 for step, (x, y) in enumerate(reader.ptb_iterator(data, model.batch_size, model.num_steps)): + #print(x,y) + inputs = Variable(torch.from_numpy(x.astype(np.int64)).transpose(0, 1).contiguous()).cuda() model.zero_grad() - hidden = repackage_hidden(hidden) + #print(hidden) + #print(x,' x ', y, ' print x and y') + ''' + hidden = repackage_hidden(hidden) + modified - ashray17aman + ''' + hidden = copy.deepcopy(savehidden) outputs, hidden = model(inputs, hidden) targets = Variable(torch.from_numpy(y.astype(np.int64)).transpose(0, 1).contiguous()).cuda() tt = torch.squeeze(targets.view(-1, model.batch_size * model.num_steps)) loss = criterion(outputs.view(-1, model.vocab_size), tt) - costs += loss.data[0] * model.num_steps + #print(loss) + costs += torch.Tensor.item(loss.data) * model.num_steps iters += model.num_steps if is_train: @@ -90,4 +101,4 @@ def run_epoch(model, data, is_train=False, lr=1.0): print('Test Perplexity: {:8.2f}'.format(run_epoch(model, test_data))) with open(args.save, 'wb') as f: torch.save(model, f) - print("########## Done! ##########################") \ No newline at end of file + print("########## Done! ##########################")