Search in sources :

Example 31 with TextAnnotation

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.

the class JsonSerializerTest method testJsonSerializabilityWithOffsets.

@Test
public void testJsonSerializabilityWithOffsets() throws Exception {
    TextAnnotation ta = DummyTextAnnotationGenerator.generateAnnotatedTextAnnotation(false, 3);
    // making sure serialization does not fail, when some views (possibly by mistake) are null
    ta.addView("nullView", null);
    // create (redundant) token offset info in output for non-CCG readers
    String json = SerializationHelper.serializeToJson(ta, true);
    JsonSerializerTest.verifyDeserializedJsonString(json, ta);
}
Also used : TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) Test(org.junit.Test)

Example 32 with TextAnnotation

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.

the class PennTreebankReader method next.

/**
 * return the next annotation object. Don't forget to increment currentAnnotationId.
 *
 * @return an annotation object.
 */
@Override
public TextAnnotation next() {
    // first check if we don't have any more lines
    if (lines == null || currentLineId == lines.size()) {
        // check if the current section has no more files
        if (currentFileId == currentSectionFiles.length) {
            // check if there are more sections
            if (currentSectionId == sections.length) {
                return null;
            }
            try {
                updateCurrentFiles();
            } catch (Exception e) {
                e.printStackTrace();
            }
            currentFileId = 0;
        }
        try {
            lines = LineIO.read(combinedWSJHome + "/" + sections[currentSectionId - 1] + "/" + currentSectionFiles[currentFileId++]);
            treeInFile = 0;
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        }
        currentLineId = 0;
    }
    TextAnnotation ta = null;
    try {
        ta = findNextTree();
    } catch (AnnotatorException e) {
        e.printStackTrace();
        throw new IllegalStateException(e);
    }
    return ta;
}
Also used : FileNotFoundException(java.io.FileNotFoundException) AnnotatorException(edu.illinois.cs.cogcomp.annotation.AnnotatorException) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) FileNotFoundException(java.io.FileNotFoundException) AnnotatorException(edu.illinois.cs.cogcomp.annotation.AnnotatorException)

Example 33 with TextAnnotation

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.

the class PennTreebankReader method findNextTree.

private TextAnnotation findNextTree() throws AnnotatorException {
    StringBuilder sb = new StringBuilder();
    int numParen = 0;
    boolean first = true;
    while (true) {
        String line = lines.get(currentLineId++);
        if (line.length() == 0)
            continue;
        if (first) {
            first = false;
            line = line.substring(0, line.indexOf("(") + 1) + TOP_LABEL + line.substring(line.indexOf("(") + 1);
        }
        int numOpenParen = line.replaceAll("[^\\(]", "").length();
        int numCloseParen = line.replaceAll("[^\\)]", "").length();
        numParen += (numOpenParen - numCloseParen);
        sb.append(line);
        if (numParen == 0)
            break;
    }
    Tree<String> tree = TreeParserFactory.getStringTreeParser().parse(sb.toString().replaceAll("\\\\/", "/"));
    String[] text = ParseUtils.getTerminalStringSentence(tree);
    String id = "wsj/" + sections[currentSectionId - 1] + "/" + currentSectionFiles[currentFileId - 1] + ":" + treeInFile;
    treeInFile++;
    TextAnnotation ta = BasicTextAnnotationBuilder.createTextAnnotationFromTokens(PENN_TREEBANK_WSJ, id, Collections.singletonList(text));
    TreeView parse = new TreeView(parseViewName, "PTB-GOLD", ta, 1.0);
    parse.setParseTree(0, tree);
    ta.addView(parseViewName, parse);
    POSFromParse pos = new POSFromParse(parseViewName);
    ta.addView(pos);
    return ta;
}
Also used : TreeView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) POSFromParse(edu.illinois.cs.cogcomp.nlp.utilities.POSFromParse)

Example 34 with TextAnnotation

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.

the class NETagPlain method tagData.

/**
 * Does this assume that init() has been called already?
 *
 * @param inputPath
 * @param outputPath
 * @throws Exception
 */
public static void tagData(String inputPath, String outputPath, String dataFormat, ParametersForLbjCode params) throws Exception {
    Data data;
    if (!dataFormat.equals("-plaintext")) {
        data = new Data(inputPath, inputPath, dataFormat, new String[] {}, new String[] {}, params);
    } else {
        // plaintext reading/writing.
        File f = new File(inputPath);
        Vector<String> inFiles = new Vector<>();
        Vector<String> outFiles = new Vector<>();
        if (f.isDirectory()) {
            String[] files = f.list();
            for (String file : files) if (!file.startsWith(".")) {
                inFiles.addElement(inputPath + File.separator + file);
                outFiles.addElement(outputPath + File.separator + file);
            }
        } else {
            inFiles.addElement(inputPath);
            outFiles.addElement(outputPath);
        }
        data = new Data();
        for (int fileId = 0; fileId < inFiles.size(); fileId++) {
            logger.debug("Tagging file: " + inFiles.elementAt(fileId));
            ArrayList<LinkedVector> sentences = PlainTextReader.parsePlainTextFile(inFiles.elementAt(fileId), params);
            NERDocument doc = new NERDocument(sentences, "consoleInput");
            data.documents.add(doc);
        }
    }
    ExpressiveFeaturesAnnotator.annotate(data, params);
    Decoder.annotateDataBIO(data, params);
    if (dataFormat.equals("-c")) {
        for (int docid = 0; docid < data.documents.size(); docid++) {
            List<String> res = new ArrayList<>();
            ArrayList<LinkedVector> sentences = data.documents.get(docid).sentences;
            for (LinkedVector vector : sentences) {
                for (int j = 0; j < vector.size(); j++) {
                    NEWord w = (NEWord) vector.get(j);
                    res.add(w.form + " " + w.neLabel + " " + w.neTypeLevel1);
                }
                res.add("");
            }
            LineIO.write(outputPath + "/" + docid + ".txt", res);
        }
    } else if (dataFormat.equals("-json")) {
        File inputfiles = new File(inputPath);
        List<TextAnnotation> tas = new ArrayList<>();
        for (String f : inputfiles.list()) {
            TextAnnotation ta = SerializationHelper.deserializeTextAnnotationFromFile(f, true);
            tas.add(ta);
        }
        TextAnnotationConverter.Data2TextAnnotation(data, tas);
        for (TextAnnotation ta : tas) {
            SerializationHelper.serializeTextAnnotationToFile(ta, outputPath + "/" + ta.getId(), true);
        }
    } else {
        throw new NotImplementedException("We do not yet support dataFormat of " + dataFormat + " yet.");
    }
}
Also used : LinkedVector(edu.illinois.cs.cogcomp.lbjava.parse.LinkedVector) NotImplementedException(org.apache.commons.lang.NotImplementedException) ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) File(java.io.File) LinkedVector(edu.illinois.cs.cogcomp.lbjava.parse.LinkedVector) Vector(java.util.Vector)

Example 35 with TextAnnotation

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.

the class Main method processInputString.

/**
 * process the single input string, produce output on standard out if no output directory is
 * defined, or produce the output in the output directory by the same file name as the input
 * file, or if a specific output filename is specified, use that name.
 *
 * @param data the string to process
 * @throws Exception if anything goes wrong.
 */
private void processInputString(String data) throws Exception {
    data = StringEscapeUtils.unescapeHtml4(data);
    TextAnnotation ta = tab.createTextAnnotation(data);
    data = this.produceOutput(this.nerAnnotator.getView(ta), ta);
    this.getResultProcessor().publish(data, Long.toString(System.currentTimeMillis()) + ".txt");
}
Also used : TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)

Aggregations

TextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)292 Constituent (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)121 Test (org.junit.Test)84 View (edu.illinois.cs.cogcomp.core.datastructures.textannotation.View)60 Feature (edu.illinois.cs.cogcomp.edison.features.Feature)48 AnnotatorException (edu.illinois.cs.cogcomp.annotation.AnnotatorException)40 ArrayList (java.util.ArrayList)33 TokenizerTextAnnotationBuilder (edu.illinois.cs.cogcomp.nlp.utility.TokenizerTextAnnotationBuilder)32 DiscreteFeature (edu.illinois.cs.cogcomp.edison.features.DiscreteFeature)28 TreeView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView)27 IntPair (edu.illinois.cs.cogcomp.core.datastructures.IntPair)24 EdisonException (edu.illinois.cs.cogcomp.edison.utilities.EdisonException)22 IOException (java.io.IOException)22 LinkedHashSet (java.util.LinkedHashSet)21 SpanLabelView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView)20 StatefulTokenizer (edu.illinois.cs.cogcomp.nlp.tokenizer.StatefulTokenizer)19 PredicateArgumentView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.PredicateArgumentView)18 Relation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Relation)18 File (java.io.File)18 XmlTextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.XmlTextAnnotation)16