From 1bbf5710916c0b6bb7ad74f341bc52bea2decdd5 Mon Sep 17 00:00:00 2001 From: Fei Zhan Date: Thu, 27 Jul 2017 20:24:56 +1000 Subject: [PATCH 1/3] feedforward: complete sanity check. subscript consistent with arxiv paper --- js/vector_math.js | 49 +++++++++++++++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 17 deletions(-) diff --git a/js/vector_math.js b/js/vector_math.js index 66402a8..a9625c6 100644 --- a/js/vector_math.js +++ b/js/vector_math.js @@ -24,26 +24,35 @@ function feedforward(inputVectors, outputVectors, inputNeurons, hiddenNeurons, o var hiddenSize = hiddenNeurons.length; var vocabSize = inputNeurons.length; - /* Sanity check */ + /* + Sanity check from left to right: + * inputNeurons + * inputVectors + * hiddenNeurons + * outputVectors + * outputNeurons + */ + assert(vocabSize == inputNeurons.length); assert(vocabSize == inputVectors.length); - assert(vocabSize == outputVectors.length); - assert(vocabSize == outputNeurons.length); inputVectors.forEach(function(v) {assert(hiddenSize == v.length)}); + assert(hiddenSize == hiddenNeurons.length); + assert(vocabSize == outputVectors.length); outputVectors.forEach(function(v) {assert(hiddenSize == v.length)}); + assert(vocabSize == outputNeurons.length); var hiddenValueTemp = []; - for (var j = 0; j < hiddenSize; j++) hiddenValueTemp.push(0); + for (var i = 0; i < hiddenSize; i++) hiddenValueTemp.push(0); var numInputExcited = 0; - inputNeurons.forEach(function(n,i) { + inputNeurons.forEach(function(n, k) { if (n['value'] < 1e-5) return; // should be either 0 or 1 numInputExcited += 1; - for (var j = 0; j < hiddenSize; j++) hiddenValueTemp[j] += inputVectors[i][j]['weight']; + for (var i = 0; i < hiddenSize; i++) hiddenValueTemp[i] += inputVectors[k][i]['weight']; }); - hiddenNeurons.forEach(function(n,j) { + hiddenNeurons.forEach(function(n, i) { if (numInputExcited > 0) { - n['value'] = hiddenValueTemp[j] / numInputExcited; // taking average (for CBOW situation) + n['value'] = hiddenValueTemp[i] / numInputExcited; // taking average (for CBOW situation) } else { n['value'] = 0; } @@ -51,22 +60,28 @@ function feedforward(inputVectors, outputVectors, inputNeurons, hiddenNeurons, o var outValueTemp = []; var sumExpNetInput = 0.0; // denominator of softmax - for (var i = 0; i < vocabSize; i++) { - tmpSum = 0.0; // net input of neuron i in output layer - for (var j = 0; j < hiddenSize; j++) { - tmpSum += outputVectors[i][j]['weight'] * hiddenNeurons[j]['value']; + for (var j = 0; j < vocabSize; j++) { + tmpSum = 0.0; // net input of neuron j in output layer + for (var i = 0; i < hiddenSize; i++) { + tmpSum += hiddenNeurons[i]['value'] * outputVectors[j][i]['weight']; } - outputNeurons[i]['net_input'] = tmpSum; + outputNeurons[j]['net_input'] = tmpSum; expNetInput = exponential(tmpSum); - if (expNetInput == Infinity) expNetInput = Number.MAX_VALUE; + if (expNetInput == Infinity) { + // take max number available in case of exponential blows up + expNetInput = Number.MAX_VALUE; + } sumExpNetInput += expNetInput; outValueTemp.push(expNetInput); } - if (sumExpNetInput == Infinity) sumExpNetInput = Number.MAX_VALUE; + if (sumExpNetInput == Infinity) { + // take max number available in case of exponential blows up + sumExpNetInput = Number.MAX_VALUE; + } - for (var i = 0; i < vocabSize; i++) { // softmax - outputNeurons[i]['value'] = outValueTemp[i] / sumExpNetInput; + for (var j = 0; j < vocabSize; j++) { // softmax + outputNeurons[j]['value'] = outValueTemp[j] / sumExpNetInput; } } From ddd27bf760f410e9a268bae3d226164cc7455009 Mon Sep 17 00:00:00 2001 From: Fei Zhan Date: Thu, 27 Jul 2017 20:48:30 +1000 Subject: [PATCH 2/3] backpropogate: complete sanity check, subscript consistent with arxiv paper --- js/vector_math.js | 47 ++++++++++++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/js/vector_math.js b/js/vector_math.js index a9625c6..e9cb4b4 100644 --- a/js/vector_math.js +++ b/js/vector_math.js @@ -94,36 +94,45 @@ function feedforward(inputVectors, outputVectors, inputNeurons, hiddenNeurons, o function backpropagate(inputVectors, outputVectors, inputNeurons, hiddenNeurons, outputNeurons, expectedOutput) { var hiddenSize = hiddenNeurons.length; var vocabSize = inputNeurons.length; - - /* Sanity check */ + /* + Sanity check from left to right: + * inputNeurons + * inputVectors + * hiddenNeurons + * outputVectors + * outputNeurons + * expectedOutput + */ + assert(vocabSize == inputNeurons.length); assert(vocabSize == inputVectors.length); + inputVectors.forEach(function(v) {assert(hiddenSize == v.length)}); + assert(hiddenSize == hiddenNeurons.length); assert(vocabSize == outputVectors.length); + outputVectors.forEach(function(v) {assert(hiddenSize == v.length)}); assert(vocabSize == outputNeurons.length); assert(vocabSize == expectedOutput.length); - inputVectors.forEach(function(v) {assert(hiddenSize == v.length)}); - outputVectors.forEach(function(v) {assert(hiddenSize == v.length)}); var errors = []; - outputNeurons.forEach(function(n, i) { - error_i = n['value'] - expectedOutput[i] - errors.push(error_i); - n['net_input_gradient'] = error_i; + outputNeurons.forEach(function(n, j) { + error_j = n['value'] - expectedOutput[j] + errors.push(error_j); + n['net_input_gradient'] = error_j; }); - hiddenNeurons.forEach(function(n, j) { + hiddenNeurons.forEach(function(n) { n['net_input_gradient'] = 0.0; }); - outputVectors.forEach(function(v, i) { // i: vocab index (opposite to my paper's notations) - v.forEach(function(e, j) { // j: hidden layer index - e['gradient'] = errors[i] * hiddenNeurons[j]['value']; - hiddenNeurons[j]['net_input_gradient'] += errors[i] * e['weight']; + outputVectors.forEach(function(v, j) { // j: vocab index + v.forEach(function(e, i) { // i: hidden layer index + e['gradient'] = errors[j] * hiddenNeurons[i]['value']; + hiddenNeurons[i]['net_input_gradient'] += errors[j] * e['weight']; }); }); var numInputExcited = 0; var isInputExcitedArray = []; - inputNeurons.forEach(function(n,i) { + inputNeurons.forEach(function(n) { if (n['value'] < 1e-5) { // should be either 0 or 1 isInputExcitedArray.push(false); } else { @@ -133,13 +142,13 @@ function backpropagate(inputVectors, outputVectors, inputNeurons, hiddenNeurons, }); assert(numInputExcited > 0, "With no input assigned, how can you backpropagate??!"); - for (var i = 0; i < vocabSize; i++) { - for (var j = 0; j < hiddenSize; j++) { - if (isInputExcitedArray[i]) { - inputVectors[i][j]['gradient'] = hiddenNeurons[j]['net_input_gradient'] / numInputExcited; + for (var k = 0; k < vocabSize; k++) { + for (var i = 0; i < hiddenSize; i++) { + if (isInputExcitedArray[k]) { + inputVectors[k][i]['gradient'] = hiddenNeurons[i]['net_input_gradient'] / numInputExcited; } else { // this is necessary -- it will reset the gradients of non-invovled input vectors. - inputVectors[i][j]['gradient'] = 0; + inputVectors[k][i]['gradient'] = 0; } } } From 26baca42a691994589e5749320eee6fdf7958f91 Mon Sep 17 00:00:00 2001 From: Fei Zhan Date: Fri, 28 Jul 2017 07:45:32 +1000 Subject: [PATCH 3/3] backpropagation function: consistent with paper and feed forward --- js/vector_math.js | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/js/vector_math.js b/js/vector_math.js index e9cb4b4..d0ab11a 100644 --- a/js/vector_math.js +++ b/js/vector_math.js @@ -114,9 +114,12 @@ function backpropagate(inputVectors, outputVectors, inputNeurons, hiddenNeurons, var errors = []; outputNeurons.forEach(function(n, j) { + // error is the difference between output y_j and expected value t + // as in the paper Eq(77) error_j = n['value'] - expectedOutput[j] errors.push(error_j); - n['net_input_gradient'] = error_j; + // net_input_gradient is the EI'_j as in the paper Eq(78) + n['net_input_gradient'] = error_j * n['value'] * (1.0 - n['value']); }); hiddenNeurons.forEach(function(n) { @@ -125,8 +128,11 @@ function backpropagate(inputVectors, outputVectors, inputNeurons, hiddenNeurons, outputVectors.forEach(function(v, j) { // j: vocab index v.forEach(function(e, i) { // i: hidden layer index - e['gradient'] = errors[j] * hiddenNeurons[i]['value']; - hiddenNeurons[i]['net_input_gradient'] += errors[j] * e['weight']; + // this is the gradient defined as partial E partial w'_ij Eq(79) + e['gradient'] = outputNeurons[j]['net_input_gradient'] * hiddenNeurons[i]['value']; + // partial E partial h_i Eq (82). This is also Eq(83) since + // h_i = u_i as defined in the feed forward function + hiddenNeurons[i]['net_input_gradient'] += outputNeurons[j]['net_input_gradient'] * e['weight']; }); }); @@ -144,6 +150,7 @@ function backpropagate(inputVectors, outputVectors, inputNeurons, hiddenNeurons, for (var k = 0; k < vocabSize; k++) { for (var i = 0; i < hiddenSize; i++) { + // Eq(84). Note x_k is either 1 or 0 if (isInputExcitedArray[k]) { inputVectors[k][i]['gradient'] = hiddenNeurons[i]['net_input_gradient'] / numInputExcited; } else {