use of edu.illinois.cs.cogcomp.depparse.core.DepInst in project cogcomp-nlp by CogComp.
the class DepAnnotator method addView.
@Override
public void addView(TextAnnotation ta) throws AnnotatorException {
for (String reqView : requiredViews) if (!ta.hasView(reqView))
throw new AnnotatorException("TextAnnotation must have view: " + reqView);
DepInst sent = new DepInst(ta);
DepStruct deptree;
try {
deptree = (DepStruct) model.infSolver.getBestStructure(model.wv, sent);
} catch (Exception e) {
throw new AnnotatorException("Sentence cannot be parsed");
}
TreeView treeView = new TreeView(ViewNames.DEPENDENCY, ta);
int rootPos = findRoot(deptree);
// All the node positions are -1 to account for the extra <root> node added
Pair<String, Integer> nodePair = new Pair<>(sent.forms[rootPos], rootPos - 1);
Tree<Pair<String, Integer>> tree = new Tree<>(nodePair);
populateChildren(tree, deptree, sent, rootPos);
treeView.setDependencyTree(0, tree);
ta.addView(ViewNames.DEPENDENCY, treeView);
}
use of edu.illinois.cs.cogcomp.depparse.core.DepInst in project cogcomp-nlp by CogComp.
the class MainClass method getStructuredData.
private static SLProblem getStructuredData(String filepath, LabeledChuLiuEdmondsDecoder infSolver) throws Exception {
CONLLReader depReader = new CONLLReader(new Preprocessor(), useGoldPOS, conllIndexOffset);
depReader.startReading(filepath);
SLProblem problem = new SLProblem();
DepInst instance = depReader.getNext();
while (instance != null) {
infSolver.updateInferenceSolver(instance);
Pair<IInstance, IStructure> pair = getSLPair(instance);
problem.addExample(pair.getFirst(), pair.getSecond());
instance = depReader.getNext();
}
logger.info("{} of dependency instances.", problem.size());
return problem;
}
use of edu.illinois.cs.cogcomp.depparse.core.DepInst in project cogcomp-nlp by CogComp.
the class LabeledDepFeatureGenerator method getFeatureVector.
@Override
public IFeatureVector getFeatureVector(IInstance x, IStructure y) {
DepInst sent = (DepInst) x;
DepStruct tree = (DepStruct) y;
return extractFeatures(sent, tree);
}
use of edu.illinois.cs.cogcomp.depparse.core.DepInst in project cogcomp-nlp by CogComp.
the class CONLLReader method getNext.
public DepInst getNext() throws IOException, AnnotatorException, EdisonException {
ArrayList<String[]> lineList = new ArrayList<>();
String line = inputReader.readLine();
while (line != null && !line.equals("") && !line.startsWith("*")) {
lineList.add(line.split("\\s+"));
line = inputReader.readLine();
sentId++;
}
int length = lineList.size();
if (length == 0) {
inputReader.close();
return null;
}
// +1 for the 0 root
String[] forms = new String[length + 1];
String[] lemmas = new String[length + 1];
String[] pos = new String[length + 1];
String[] chunks = new String[length + 1];
String[] deprels = new String[length + 1];
int[] heads = new int[length + 1];
forms[0] = "<root>";
pos[0] = "<root-POS>";
deprels[0] = "<no-type>";
heads[0] = -1;
lemmas[0] = "<root>";
pos[0] = "<root-POS>";
chunks[0] = "<root-CHUNK>";
for (int i = 0; i < length; i++) {
String[] info = lineList.get(i);
forms[i + 1] = normalize(info[FORM_INDEX]);
pos[i + 1] = info[POS_INDEX];
deprels[i + 1] = info[DEP_INDEX];
heads[i + 1] = Integer.parseInt(info[HEAD_INDEX]);
}
TextAnnotation annotation = preprocessor.annotate(CORPUS_ID, String.valueOf(sentId), forms);
SpanLabelView lemmaView = (SpanLabelView) annotation.getView(ViewNames.LEMMA);
SpanLabelView posView = (SpanLabelView) annotation.getView(ViewNames.POS);
SpanLabelView chunkView = (SpanLabelView) annotation.getView(ViewNames.SHALLOW_PARSE);
for (int i = 0; i < chunks.length - 1; i++) {
lemmas[i + 1] = lemmaView.getLabel(i);
if (!useGoldPOS)
pos[i + 1] = posView.getLabel(i);
chunks[i + 1] = chunkView.getLabel(i);
}
return new DepInst(forms, lemmas, pos, chunks, deprels, heads);
}
use of edu.illinois.cs.cogcomp.depparse.core.DepInst in project cogcomp-nlp by CogComp.
the class MainClass method test.
private static void test(String modelPath, String testDataPath, boolean updateMatrix) throws Exception {
SLModel model = SLModel.loadModel(modelPath);
((LabeledChuLiuEdmondsDecoder) model.infSolver).loadDepRelDict();
SLProblem sp = getStructuredData(testDataPath, (LabeledChuLiuEdmondsDecoder) model.infSolver);
double acc_undirected = 0.0;
double acc_directed_unlabeled = 0.0;
double acc_labeled = 0.0;
double total = 0.0;
long totalTime = 0L;
int totalLength = 0;
for (int i = 0; i < sp.instanceList.size(); i++) {
DepInst sent = (DepInst) sp.instanceList.get(i);
totalLength += sent.size();
DepStruct gold = (DepStruct) sp.goldStructureList.get(i);
long startTime = System.currentTimeMillis();
DepStruct prediction = (DepStruct) model.infSolver.getBestStructure(model.wv, sent);
totalTime += (System.currentTimeMillis() - startTime);
IntPair tmp_undirected = evaluate(sent, gold, prediction, false, false, false);
IntPair tmp_directed_unlabeled = evaluate(sent, gold, prediction, true, false, false);
IntPair tmp_labeled = evaluate(sent, gold, prediction, true, true, updateMatrix);
acc_undirected += tmp_undirected.getFirst();
acc_directed_unlabeled += tmp_directed_unlabeled.getFirst();
acc_labeled += tmp_labeled.getFirst();
total += tmp_directed_unlabeled.getSecond();
}
System.out.println("Parsing time taken for " + sp.size() + " sentences with average length " + totalLength / sp.size() + ": " + totalTime);
System.out.println("Average parsing time " + totalTime / sp.size());
System.out.println("undirected acc " + acc_undirected);
System.out.println("directed unlabeled acc " + acc_directed_unlabeled);
System.out.println("labeled acc " + acc_labeled);
System.out.println("total " + total);
System.out.println("%age correct undirected " + (acc_undirected * 1.0 / total));
System.out.println("%age correct directed & unlabeled " + (acc_directed_unlabeled * 1.0 / total));
System.out.println("%age correct labeled " + (acc_labeled * 1.0 / total));
if (updateMatrix)
printMatrix();
System.out.println("Done with testing!");
}
Aggregations