use of edu.stanford.nlp.util.TwoDimensionalMap in project CoreNLP by stanfordnlp.
the class ConvertModels method readParser.
public static LexicalizedParser readParser(ObjectInputStream in) throws IOException, ClassNotFoundException {
LexicalizedParser model = ErasureUtils.uncheckedCast(in.readObject());
Function<List<List<Double>>, SimpleMatrix> f = (x) -> toMatrix(x);
TwoDimensionalMap<String, String, List<List<Double>>> map2dSM = ErasureUtils.uncheckedCast(in.readObject());
TwoDimensionalMap<String, String, SimpleMatrix> binaryTransform = transform2DMap(map2dSM, f);
Map<String, List<List<Double>>> map = ErasureUtils.uncheckedCast(in.readObject());
Map<String, SimpleMatrix> unaryTransform = transformMap(map, f);
map2dSM = ErasureUtils.uncheckedCast(in.readObject());
TwoDimensionalMap<String, String, SimpleMatrix> binaryScore = transform2DMap(map2dSM, f);
map = ErasureUtils.uncheckedCast(in.readObject());
Map<String, SimpleMatrix> unaryScore = transformMap(map, f);
map = ErasureUtils.uncheckedCast(in.readObject());
Map<String, SimpleMatrix> wordVectors = transformMap(map, f);
DVModel dvModel = new DVModel(binaryTransform, unaryTransform, binaryScore, unaryScore, wordVectors, model.getOp());
DVModelReranker reranker = new DVModelReranker(dvModel);
model.reranker = reranker;
return model;
}
use of edu.stanford.nlp.util.TwoDimensionalMap in project CoreNLP by stanfordnlp.
the class SplittingGrammarExtractor method mergeStates.
public void mergeStates() {
if (op.trainOptions.splitRecombineRate <= 0.0) {
return;
}
// we go through the machinery to sum up the temporary betas,
// counting the total mass
TwoDimensionalMap<String, String, double[][]> tempUnaryBetas = new TwoDimensionalMap<>();
ThreeDimensionalMap<String, String, String, double[][][]> tempBinaryBetas = new ThreeDimensionalMap<>();
Map<String, double[]> totalStateMass = Generics.newHashMap();
recalculateTemporaryBetas(false, totalStateMass, tempUnaryBetas, tempBinaryBetas);
// Next, for each tree we count the effect of merging its
// annotations. We only consider the most recently split
// annotations as candidates for merging.
Map<String, double[]> deltaAnnotations = Generics.newHashMap();
for (Tree tree : trees) {
countMergeEffects(tree, totalStateMass, deltaAnnotations);
}
// Now we have a map of the (approximate) likelihood loss from
// merging each state. We merge the ones that provide the least
// benefit, up to the splitRecombineRate
List<Triple<String, Integer, Double>> sortedDeltas = new ArrayList<>();
for (String state : deltaAnnotations.keySet()) {
double[] scores = deltaAnnotations.get(state);
for (int i = 0; i < scores.length; ++i) {
sortedDeltas.add(new Triple<>(state, i * 2, scores[i]));
}
}
Collections.sort(sortedDeltas, new Comparator<Triple<String, Integer, Double>>() {
public int compare(Triple<String, Integer, Double> first, Triple<String, Integer, Double> second) {
// "backwards", sorting from high to low.
return Double.compare(second.third(), first.third());
}
public boolean equals(Object o) {
return o == this;
}
});
// for (Triple<String, Integer, Double> delta : sortedDeltas) {
// System.out.println(delta.first() + "-" + delta.second() + ": " + delta.third());
// }
// System.out.println("-------------");
// Only merge a fraction of the splits based on what the user
// originally asked for
int splitsToMerge = (int) (sortedDeltas.size() * op.trainOptions.splitRecombineRate);
splitsToMerge = Math.max(0, splitsToMerge);
splitsToMerge = Math.min(sortedDeltas.size() - 1, splitsToMerge);
sortedDeltas = sortedDeltas.subList(0, splitsToMerge);
System.out.println();
System.out.println(sortedDeltas);
Map<String, int[]> mergeCorrespondence = buildMergeCorrespondence(sortedDeltas);
recalculateMergedBetas(mergeCorrespondence);
for (Triple<String, Integer, Double> delta : sortedDeltas) {
stateSplitCounts.decrementCount(delta.first(), 1);
}
}
use of edu.stanford.nlp.util.TwoDimensionalMap in project CoreNLP by stanfordnlp.
the class DVParserCostAndGradient method calculate.
// fill value & derivative
public void calculate(double[] theta) {
dvModel.vectorToParams(theta);
double localValue = 0.0;
double[] localDerivative = new double[theta.length];
TwoDimensionalMap<String, String, SimpleMatrix> binaryW_dfsG, binaryW_dfsB;
binaryW_dfsG = TwoDimensionalMap.treeMap();
binaryW_dfsB = TwoDimensionalMap.treeMap();
TwoDimensionalMap<String, String, SimpleMatrix> binaryScoreDerivativesG, binaryScoreDerivativesB;
binaryScoreDerivativesG = TwoDimensionalMap.treeMap();
binaryScoreDerivativesB = TwoDimensionalMap.treeMap();
Map<String, SimpleMatrix> unaryW_dfsG, unaryW_dfsB;
unaryW_dfsG = new TreeMap<>();
unaryW_dfsB = new TreeMap<>();
Map<String, SimpleMatrix> unaryScoreDerivativesG, unaryScoreDerivativesB;
unaryScoreDerivativesG = new TreeMap<>();
unaryScoreDerivativesB = new TreeMap<>();
Map<String, SimpleMatrix> wordVectorDerivativesG = new TreeMap<>();
Map<String, SimpleMatrix> wordVectorDerivativesB = new TreeMap<>();
for (TwoDimensionalMap.Entry<String, String, SimpleMatrix> entry : dvModel.binaryTransform) {
int numRows = entry.getValue().numRows();
int numCols = entry.getValue().numCols();
binaryW_dfsG.put(entry.getFirstKey(), entry.getSecondKey(), new SimpleMatrix(numRows, numCols));
binaryW_dfsB.put(entry.getFirstKey(), entry.getSecondKey(), new SimpleMatrix(numRows, numCols));
binaryScoreDerivativesG.put(entry.getFirstKey(), entry.getSecondKey(), new SimpleMatrix(1, numRows));
binaryScoreDerivativesB.put(entry.getFirstKey(), entry.getSecondKey(), new SimpleMatrix(1, numRows));
}
for (Map.Entry<String, SimpleMatrix> entry : dvModel.unaryTransform.entrySet()) {
int numRows = entry.getValue().numRows();
int numCols = entry.getValue().numCols();
unaryW_dfsG.put(entry.getKey(), new SimpleMatrix(numRows, numCols));
unaryW_dfsB.put(entry.getKey(), new SimpleMatrix(numRows, numCols));
unaryScoreDerivativesG.put(entry.getKey(), new SimpleMatrix(1, numRows));
unaryScoreDerivativesB.put(entry.getKey(), new SimpleMatrix(1, numRows));
}
if (op.trainOptions.trainWordVectors) {
for (Map.Entry<String, SimpleMatrix> entry : dvModel.wordVectors.entrySet()) {
int numRows = entry.getValue().numRows();
int numCols = entry.getValue().numCols();
wordVectorDerivativesG.put(entry.getKey(), new SimpleMatrix(numRows, numCols));
wordVectorDerivativesB.put(entry.getKey(), new SimpleMatrix(numRows, numCols));
}
}
// Some optimization methods prints out a line without an end, so our
// debugging statements are misaligned
Timing scoreTiming = new Timing();
scoreTiming.doing("Scoring trees");
int treeNum = 0;
MulticoreWrapper<Tree, Pair<DeepTree, DeepTree>> wrapper = new MulticoreWrapper<>(op.trainOptions.trainingThreads, new ScoringProcessor());
for (Tree tree : trainingBatch) {
wrapper.put(tree);
}
wrapper.join();
scoreTiming.done();
while (wrapper.peek()) {
Pair<DeepTree, DeepTree> result = wrapper.poll();
DeepTree goldTree = result.first;
DeepTree bestTree = result.second;
StringBuilder treeDebugLine = new StringBuilder();
Formatter formatter = new Formatter(treeDebugLine);
boolean isDone = (Math.abs(bestTree.getScore() - goldTree.getScore()) <= 0.00001 || goldTree.getScore() > bestTree.getScore());
String done = isDone ? "done" : "";
formatter.format("Tree %6d Highest tree: %12.4f Correct tree: %12.4f %s", treeNum, bestTree.getScore(), goldTree.getScore(), done);
log.info(treeDebugLine.toString());
if (!isDone) {
// if the gold tree is better than the best hypothesis tree by
// a large enough margin, then the score difference will be 0
// and we ignore the tree
double valueDelta = bestTree.getScore() - goldTree.getScore();
// double valueDelta = Math.max(0.0, - scoreGold + bestScore);
localValue += valueDelta;
// get the context words for this tree - should be the same
// for either goldTree or bestTree
List<String> words = getContextWords(goldTree.getTree());
// The derivatives affected by this tree are only based on the
// nodes present in this tree, eg not all matrix derivatives
// will be affected by this tree
backpropDerivative(goldTree.getTree(), words, goldTree.getVectors(), binaryW_dfsG, unaryW_dfsG, binaryScoreDerivativesG, unaryScoreDerivativesG, wordVectorDerivativesG);
backpropDerivative(bestTree.getTree(), words, bestTree.getVectors(), binaryW_dfsB, unaryW_dfsB, binaryScoreDerivativesB, unaryScoreDerivativesB, wordVectorDerivativesB);
}
++treeNum;
}
double[] localDerivativeGood;
double[] localDerivativeB;
if (op.trainOptions.trainWordVectors) {
localDerivativeGood = NeuralUtils.paramsToVector(theta.length, binaryW_dfsG.valueIterator(), unaryW_dfsG.values().iterator(), binaryScoreDerivativesG.valueIterator(), unaryScoreDerivativesG.values().iterator(), wordVectorDerivativesG.values().iterator());
localDerivativeB = NeuralUtils.paramsToVector(theta.length, binaryW_dfsB.valueIterator(), unaryW_dfsB.values().iterator(), binaryScoreDerivativesB.valueIterator(), unaryScoreDerivativesB.values().iterator(), wordVectorDerivativesB.values().iterator());
} else {
localDerivativeGood = NeuralUtils.paramsToVector(theta.length, binaryW_dfsG.valueIterator(), unaryW_dfsG.values().iterator(), binaryScoreDerivativesG.valueIterator(), unaryScoreDerivativesG.values().iterator());
localDerivativeB = NeuralUtils.paramsToVector(theta.length, binaryW_dfsB.valueIterator(), unaryW_dfsB.values().iterator(), binaryScoreDerivativesB.valueIterator(), unaryScoreDerivativesB.values().iterator());
}
// correct - highest
for (int i = 0; i < localDerivativeGood.length; i++) {
localDerivative[i] = localDerivativeB[i] - localDerivativeGood[i];
}
// TODO: this is where we would combine multiple costs if we had parallelized the calculation
value = localValue;
derivative = localDerivative;
// normalizing by training batch size
value = (1.0 / trainingBatch.size()) * value;
ArrayMath.multiplyInPlace(derivative, (1.0 / trainingBatch.size()));
// add regularization to cost:
double[] currentParams = dvModel.paramsToVector();
double regCost = 0;
for (double currentParam : currentParams) {
regCost += currentParam * currentParam;
}
regCost = op.trainOptions.regCost * 0.5 * regCost;
value += regCost;
// add regularization to gradient
ArrayMath.multiplyInPlace(currentParams, op.trainOptions.regCost);
ArrayMath.pairwiseAddInPlace(derivative, currentParams);
}
use of edu.stanford.nlp.util.TwoDimensionalMap in project CoreNLP by stanfordnlp.
the class SentimentCostAndGradient method scaleAndRegularize.
private static double scaleAndRegularize(TwoDimensionalMap<String, String, SimpleMatrix> derivatives, TwoDimensionalMap<String, String, SimpleMatrix> currentMatrices, double scale, double regCost, boolean dropBiasColumn) {
// the regularization cost
double cost = 0.0;
for (TwoDimensionalMap.Entry<String, String, SimpleMatrix> entry : currentMatrices) {
SimpleMatrix D = derivatives.get(entry.getFirstKey(), entry.getSecondKey());
SimpleMatrix regMatrix = entry.getValue();
if (dropBiasColumn) {
regMatrix = new SimpleMatrix(regMatrix);
regMatrix.insertIntoThis(0, regMatrix.numCols() - 1, new SimpleMatrix(regMatrix.numRows(), 1));
}
D = D.scale(scale).plus(regMatrix.scale(regCost));
derivatives.put(entry.getFirstKey(), entry.getSecondKey(), D);
cost += regMatrix.elementMult(regMatrix).elementSum() * regCost / 2.0;
}
return cost;
}
use of edu.stanford.nlp.util.TwoDimensionalMap in project CoreNLP by stanfordnlp.
the class SentimentCostAndGradient method scaleAndRegularizeTensor.
private static double scaleAndRegularizeTensor(TwoDimensionalMap<String, String, SimpleTensor> derivatives, TwoDimensionalMap<String, String, SimpleTensor> currentMatrices, double scale, double regCost) {
// the regularization cost
double cost = 0.0;
for (TwoDimensionalMap.Entry<String, String, SimpleTensor> entry : currentMatrices) {
SimpleTensor D = derivatives.get(entry.getFirstKey(), entry.getSecondKey());
D = D.scale(scale).plus(entry.getValue().scale(regCost));
derivatives.put(entry.getFirstKey(), entry.getSecondKey(), D);
cost += entry.getValue().elementMult(entry.getValue()).elementSum() * regCost / 2.0;
}
return cost;
}
Aggregations