use of edu.stanford.nlp.neural.SimpleTensor in project CoreNLP by stanfordnlp.
the class SentimentCostAndGradient method backpropDerivativesAndError.
private void backpropDerivativesAndError(Tree tree, TwoDimensionalMap<String, String, SimpleMatrix> binaryTD, TwoDimensionalMap<String, String, SimpleMatrix> binaryCD, TwoDimensionalMap<String, String, SimpleTensor> binaryTensorTD, Map<String, SimpleMatrix> unaryCD, Map<String, SimpleMatrix> wordVectorD, SimpleMatrix deltaUp) {
if (tree.isLeaf()) {
return;
}
SimpleMatrix currentVector = RNNCoreAnnotations.getNodeVector(tree);
String category = tree.label().value();
category = model.basicCategory(category);
// Build a vector that looks like 0,0,1,0,0 with an indicator for the correct class
SimpleMatrix goldLabel = new SimpleMatrix(model.numClasses, 1);
int goldClass = RNNCoreAnnotations.getGoldClass(tree);
if (goldClass >= 0) {
goldLabel.set(goldClass, 1.0);
}
double nodeWeight = model.op.trainOptions.getClassWeight(goldClass);
SimpleMatrix predictions = RNNCoreAnnotations.getPredictions(tree);
// If this is an unlabeled class, set deltaClass to 0. We could
// make this more efficient by eliminating various of the below
// calculations, but this would be the easiest way to handle the
// unlabeled class
SimpleMatrix deltaClass = goldClass >= 0 ? predictions.minus(goldLabel).scale(nodeWeight) : new SimpleMatrix(predictions.numRows(), predictions.numCols());
SimpleMatrix localCD = deltaClass.mult(NeuralUtils.concatenateWithBias(currentVector).transpose());
double error = -(NeuralUtils.elementwiseApplyLog(predictions).elementMult(goldLabel).elementSum());
error = error * nodeWeight;
RNNCoreAnnotations.setPredictionError(tree, error);
if (tree.isPreTerminal()) {
// below us is a word vector
unaryCD.put(category, unaryCD.get(category).plus(localCD));
String word = tree.children()[0].label().value();
word = model.getVocabWord(word);
// SimpleMatrix currentVectorDerivative = NeuralUtils.elementwiseApplyTanhDerivative(currentVector);
// SimpleMatrix deltaFromClass = model.getUnaryClassification(category).transpose().mult(deltaClass);
// SimpleMatrix deltaFull = deltaFromClass.extractMatrix(0, model.op.numHid, 0, 1).plus(deltaUp);
// SimpleMatrix wordDerivative = deltaFull.elementMult(currentVectorDerivative);
// wordVectorD.put(word, wordVectorD.get(word).plus(wordDerivative));
SimpleMatrix currentVectorDerivative = NeuralUtils.elementwiseApplyTanhDerivative(currentVector);
SimpleMatrix deltaFromClass = model.getUnaryClassification(category).transpose().mult(deltaClass);
deltaFromClass = deltaFromClass.extractMatrix(0, model.op.numHid, 0, 1).elementMult(currentVectorDerivative);
SimpleMatrix deltaFull = deltaFromClass.plus(deltaUp);
SimpleMatrix oldWordVectorD = wordVectorD.get(word);
if (oldWordVectorD == null) {
wordVectorD.put(word, deltaFull);
} else {
wordVectorD.put(word, oldWordVectorD.plus(deltaFull));
}
} else {
// Otherwise, this must be a binary node
String leftCategory = model.basicCategory(tree.children()[0].label().value());
String rightCategory = model.basicCategory(tree.children()[1].label().value());
if (model.op.combineClassification) {
unaryCD.put("", unaryCD.get("").plus(localCD));
} else {
binaryCD.put(leftCategory, rightCategory, binaryCD.get(leftCategory, rightCategory).plus(localCD));
}
SimpleMatrix currentVectorDerivative = NeuralUtils.elementwiseApplyTanhDerivative(currentVector);
SimpleMatrix deltaFromClass = model.getBinaryClassification(leftCategory, rightCategory).transpose().mult(deltaClass);
deltaFromClass = deltaFromClass.extractMatrix(0, model.op.numHid, 0, 1).elementMult(currentVectorDerivative);
SimpleMatrix deltaFull = deltaFromClass.plus(deltaUp);
SimpleMatrix leftVector = RNNCoreAnnotations.getNodeVector(tree.children()[0]);
SimpleMatrix rightVector = RNNCoreAnnotations.getNodeVector(tree.children()[1]);
SimpleMatrix childrenVector = NeuralUtils.concatenateWithBias(leftVector, rightVector);
SimpleMatrix W_df = deltaFull.mult(childrenVector.transpose());
binaryTD.put(leftCategory, rightCategory, binaryTD.get(leftCategory, rightCategory).plus(W_df));
SimpleMatrix deltaDown;
if (model.op.useTensors) {
SimpleTensor Wt_df = getTensorGradient(deltaFull, leftVector, rightVector);
binaryTensorTD.put(leftCategory, rightCategory, binaryTensorTD.get(leftCategory, rightCategory).plus(Wt_df));
deltaDown = computeTensorDeltaDown(deltaFull, leftVector, rightVector, model.getBinaryTransform(leftCategory, rightCategory), model.getBinaryTensor(leftCategory, rightCategory));
} else {
deltaDown = model.getBinaryTransform(leftCategory, rightCategory).transpose().mult(deltaFull);
}
SimpleMatrix leftDerivative = NeuralUtils.elementwiseApplyTanhDerivative(leftVector);
SimpleMatrix rightDerivative = NeuralUtils.elementwiseApplyTanhDerivative(rightVector);
SimpleMatrix leftDeltaDown = deltaDown.extractMatrix(0, deltaFull.numRows(), 0, 1);
SimpleMatrix rightDeltaDown = deltaDown.extractMatrix(deltaFull.numRows(), deltaFull.numRows() * 2, 0, 1);
backpropDerivativesAndError(tree.children()[0], binaryTD, binaryCD, binaryTensorTD, unaryCD, wordVectorD, leftDerivative.elementMult(leftDeltaDown));
backpropDerivativesAndError(tree.children()[1], binaryTD, binaryCD, binaryTensorTD, unaryCD, wordVectorD, rightDerivative.elementMult(rightDeltaDown));
}
}
use of edu.stanford.nlp.neural.SimpleTensor in project CoreNLP by stanfordnlp.
the class SentimentCostAndGradient method forwardPropagateTree.
/**
* This is the method to call for assigning labels and node vectors
* to the Tree. After calling this, each of the non-leaf nodes will
* have the node vector and the predictions of their classes
* assigned to that subtree's node. The annotations filled in are
* the RNNCoreAnnotations.NodeVector, Predictions, and
* PredictedClass. In general, PredictedClass will be the most
* useful annotation except when training.
*/
public void forwardPropagateTree(Tree tree) {
// initialized below or Exception thrown // = null;
SimpleMatrix nodeVector;
// initialized below or Exception thrown // = null;
SimpleMatrix classification;
if (tree.isLeaf()) {
// degenerate trees of just one leaf)
throw new ForwardPropagationException("We should not have reached leaves in forwardPropagate");
} else if (tree.isPreTerminal()) {
classification = model.getUnaryClassification(tree.label().value());
String word = tree.children()[0].label().value();
SimpleMatrix wordVector = model.getWordVector(word);
nodeVector = NeuralUtils.elementwiseApplyTanh(wordVector);
} else if (tree.children().length == 1) {
throw new ForwardPropagationException("Non-preterminal nodes of size 1 should have already been collapsed");
} else if (tree.children().length == 2) {
forwardPropagateTree(tree.children()[0]);
forwardPropagateTree(tree.children()[1]);
String leftCategory = tree.children()[0].label().value();
String rightCategory = tree.children()[1].label().value();
SimpleMatrix W = model.getBinaryTransform(leftCategory, rightCategory);
classification = model.getBinaryClassification(leftCategory, rightCategory);
SimpleMatrix leftVector = RNNCoreAnnotations.getNodeVector(tree.children()[0]);
SimpleMatrix rightVector = RNNCoreAnnotations.getNodeVector(tree.children()[1]);
SimpleMatrix childrenVector = NeuralUtils.concatenateWithBias(leftVector, rightVector);
if (model.op.useTensors) {
SimpleTensor tensor = model.getBinaryTensor(leftCategory, rightCategory);
SimpleMatrix tensorIn = NeuralUtils.concatenate(leftVector, rightVector);
SimpleMatrix tensorOut = tensor.bilinearProducts(tensorIn);
nodeVector = NeuralUtils.elementwiseApplyTanh(W.mult(childrenVector).plus(tensorOut));
} else {
nodeVector = NeuralUtils.elementwiseApplyTanh(W.mult(childrenVector));
}
} else {
StringBuilder error = new StringBuilder();
error.append("SentimentCostAndGradient: Tree not correctly binarized:\n ");
error.append(tree);
error.append("\nToo many top level constituents present: ");
error.append("(" + tree.value());
for (Tree child : tree.children()) {
error.append(" (" + child.value() + " ...)");
}
error.append(")");
throw new ForwardPropagationException(error.toString());
}
SimpleMatrix predictions = NeuralUtils.softmax(classification.mult(NeuralUtils.concatenateWithBias(nodeVector)));
int index = getPredictedClass(predictions);
if (!(tree.label() instanceof CoreLabel)) {
log.info("SentimentCostAndGradient: warning: No CoreLabels in nodes: " + tree);
throw new AssertionError("Expected CoreLabels in the nodes");
}
CoreLabel label = (CoreLabel) tree.label();
label.set(RNNCoreAnnotations.Predictions.class, predictions);
label.set(RNNCoreAnnotations.PredictedClass.class, index);
label.set(RNNCoreAnnotations.NodeVector.class, nodeVector);
}
use of edu.stanford.nlp.neural.SimpleTensor in project CoreNLP by stanfordnlp.
the class SentimentModel method randomBinaryTensor.
SimpleTensor randomBinaryTensor() {
double range = 1.0 / (4.0 * numHid);
SimpleTensor tensor = SimpleTensor.random(numHid * 2, numHid * 2, numHid, -range, range, rand);
return tensor.scale(op.trainOptions.scalingForInit);
}
use of edu.stanford.nlp.neural.SimpleTensor in project CoreNLP by stanfordnlp.
the class ConvertMatlabModel method main.
public static void main(String[] args) throws IOException {
String basePath = "/user/socherr/scr/projects/semComp/RNTN/src/params/";
int numSlices = 25;
boolean useEscapedParens = false;
for (int argIndex = 0; argIndex < args.length; ) {
if (args[argIndex].equalsIgnoreCase("-slices")) {
numSlices = Integer.parseInt(args[argIndex + 1]);
argIndex += 2;
} else if (args[argIndex].equalsIgnoreCase("-path")) {
basePath = args[argIndex + 1];
argIndex += 2;
} else if (args[argIndex].equalsIgnoreCase("-useEscapedParens")) {
useEscapedParens = true;
argIndex += 1;
} else {
log.info("Unknown argument " + args[argIndex]);
System.exit(2);
}
}
SimpleMatrix[] slices = new SimpleMatrix[numSlices];
for (int i = 0; i < numSlices; ++i) {
slices[i] = loadMatrix(basePath + "bin/Wt_" + (i + 1) + ".bin", basePath + "Wt_" + (i + 1) + ".txt");
}
SimpleTensor tensor = new SimpleTensor(slices);
log.info("W tensor size: " + tensor.numRows() + "x" + tensor.numCols() + "x" + tensor.numSlices());
SimpleMatrix W = loadMatrix(basePath + "bin/W.bin", basePath + "W.txt");
log.info("W matrix size: " + W.numRows() + "x" + W.numCols());
SimpleMatrix Wcat = loadMatrix(basePath + "bin/Wcat.bin", basePath + "Wcat.txt");
log.info("W cat size: " + Wcat.numRows() + "x" + Wcat.numCols());
SimpleMatrix combinedWV = loadMatrix(basePath + "bin/Wv.bin", basePath + "Wv.txt");
log.info("Word matrix size: " + combinedWV.numRows() + "x" + combinedWV.numCols());
File vocabFile = new File(basePath + "vocab_1.txt");
if (!vocabFile.exists()) {
vocabFile = new File(basePath + "words.txt");
}
List<String> lines = Generics.newArrayList();
for (String line : IOUtils.readLines(vocabFile)) {
lines.add(line.trim());
}
log.info("Lines in vocab file: " + lines.size());
Map<String, SimpleMatrix> wordVectors = Generics.newTreeMap();
for (int i = 0; i < lines.size() && i < combinedWV.numCols(); ++i) {
String[] pieces = lines.get(i).split(" +");
if (pieces.length == 0 || pieces.length > 1) {
continue;
}
wordVectors.put(pieces[0], combinedWV.extractMatrix(0, numSlices, i, i + 1));
if (pieces[0].equals("UNK")) {
wordVectors.put(SentimentModel.UNKNOWN_WORD, wordVectors.get("UNK"));
}
}
// If there is no ",", we first try to look for an HTML escaping,
// then fall back to "." as better than just a random word vector.
// Same for "``" and ";"
copyWordVector(wordVectors, ",", ",");
copyWordVector(wordVectors, ".", ",");
copyWordVector(wordVectors, ";", ";");
copyWordVector(wordVectors, ".", ";");
copyWordVector(wordVectors, "``", "``");
copyWordVector(wordVectors, "''", "``");
if (useEscapedParens) {
replaceWordVector(wordVectors, "(", "-LRB-");
replaceWordVector(wordVectors, ")", "-RRB-");
}
RNNOptions op = new RNNOptions();
op.numHid = numSlices;
op.lowercaseWordVectors = false;
if (Wcat.numRows() == 2) {
op.classNames = new String[] { "Negative", "Positive" };
// TODO: set to null once old models are updated
op.equivalenceClasses = new int[][] { { 0 }, { 1 } };
op.numClasses = 2;
}
if (!wordVectors.containsKey(SentimentModel.UNKNOWN_WORD)) {
wordVectors.put(SentimentModel.UNKNOWN_WORD, SimpleMatrix.random_DDRM(numSlices, 1, -0.00001, 0.00001, new Random()));
}
SentimentModel model = SentimentModel.modelFromMatrices(W, Wcat, tensor, wordVectors, op);
model.saveSerialized("matlab.ser.gz");
}
use of edu.stanford.nlp.neural.SimpleTensor in project CoreNLP by stanfordnlp.
the class SentimentCostAndGradient method getTensorGradient.
private static SimpleTensor getTensorGradient(SimpleMatrix deltaFull, SimpleMatrix leftVector, SimpleMatrix rightVector) {
int size = deltaFull.getNumElements();
SimpleTensor Wt_df = new SimpleTensor(size * 2, size * 2, size);
// TODO: combine this concatenation with computeTensorDeltaDown?
SimpleMatrix fullVector = NeuralUtils.concatenate(leftVector, rightVector);
for (int slice = 0; slice < size; ++slice) {
Wt_df.setSlice(slice, fullVector.scale(deltaFull.get(slice)).mult(fullVector.transpose()));
}
return Wt_df;
}
Aggregations