use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.
the class JsonSerializerTest method testJsonSerializabilityWithOffsets.
@Test
public void testJsonSerializabilityWithOffsets() throws Exception {
TextAnnotation ta = DummyTextAnnotationGenerator.generateAnnotatedTextAnnotation(false, 3);
// making sure serialization does not fail, when some views (possibly by mistake) are null
ta.addView("nullView", null);
// create (redundant) token offset info in output for non-CCG readers
String json = SerializationHelper.serializeToJson(ta, true);
JsonSerializerTest.verifyDeserializedJsonString(json, ta);
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.
the class PennTreebankReader method next.
/**
* return the next annotation object. Don't forget to increment currentAnnotationId.
*
* @return an annotation object.
*/
@Override
public TextAnnotation next() {
// first check if we don't have any more lines
if (lines == null || currentLineId == lines.size()) {
// check if the current section has no more files
if (currentFileId == currentSectionFiles.length) {
// check if there are more sections
if (currentSectionId == sections.length) {
return null;
}
try {
updateCurrentFiles();
} catch (Exception e) {
e.printStackTrace();
}
currentFileId = 0;
}
try {
lines = LineIO.read(combinedWSJHome + "/" + sections[currentSectionId - 1] + "/" + currentSectionFiles[currentFileId++]);
treeInFile = 0;
} catch (FileNotFoundException e) {
e.printStackTrace();
}
currentLineId = 0;
}
TextAnnotation ta = null;
try {
ta = findNextTree();
} catch (AnnotatorException e) {
e.printStackTrace();
throw new IllegalStateException(e);
}
return ta;
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.
the class PennTreebankReader method findNextTree.
private TextAnnotation findNextTree() throws AnnotatorException {
StringBuilder sb = new StringBuilder();
int numParen = 0;
boolean first = true;
while (true) {
String line = lines.get(currentLineId++);
if (line.length() == 0)
continue;
if (first) {
first = false;
line = line.substring(0, line.indexOf("(") + 1) + TOP_LABEL + line.substring(line.indexOf("(") + 1);
}
int numOpenParen = line.replaceAll("[^\\(]", "").length();
int numCloseParen = line.replaceAll("[^\\)]", "").length();
numParen += (numOpenParen - numCloseParen);
sb.append(line);
if (numParen == 0)
break;
}
Tree<String> tree = TreeParserFactory.getStringTreeParser().parse(sb.toString().replaceAll("\\\\/", "/"));
String[] text = ParseUtils.getTerminalStringSentence(tree);
String id = "wsj/" + sections[currentSectionId - 1] + "/" + currentSectionFiles[currentFileId - 1] + ":" + treeInFile;
treeInFile++;
TextAnnotation ta = BasicTextAnnotationBuilder.createTextAnnotationFromTokens(PENN_TREEBANK_WSJ, id, Collections.singletonList(text));
TreeView parse = new TreeView(parseViewName, "PTB-GOLD", ta, 1.0);
parse.setParseTree(0, tree);
ta.addView(parseViewName, parse);
POSFromParse pos = new POSFromParse(parseViewName);
ta.addView(pos);
return ta;
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.
the class NETagPlain method tagData.
/**
* Does this assume that init() has been called already?
*
* @param inputPath
* @param outputPath
* @throws Exception
*/
public static void tagData(String inputPath, String outputPath, String dataFormat, ParametersForLbjCode params) throws Exception {
Data data;
if (!dataFormat.equals("-plaintext")) {
data = new Data(inputPath, inputPath, dataFormat, new String[] {}, new String[] {}, params);
} else {
// plaintext reading/writing.
File f = new File(inputPath);
Vector<String> inFiles = new Vector<>();
Vector<String> outFiles = new Vector<>();
if (f.isDirectory()) {
String[] files = f.list();
for (String file : files) if (!file.startsWith(".")) {
inFiles.addElement(inputPath + File.separator + file);
outFiles.addElement(outputPath + File.separator + file);
}
} else {
inFiles.addElement(inputPath);
outFiles.addElement(outputPath);
}
data = new Data();
for (int fileId = 0; fileId < inFiles.size(); fileId++) {
logger.debug("Tagging file: " + inFiles.elementAt(fileId));
ArrayList<LinkedVector> sentences = PlainTextReader.parsePlainTextFile(inFiles.elementAt(fileId), params);
NERDocument doc = new NERDocument(sentences, "consoleInput");
data.documents.add(doc);
}
}
ExpressiveFeaturesAnnotator.annotate(data, params);
Decoder.annotateDataBIO(data, params);
if (dataFormat.equals("-c")) {
for (int docid = 0; docid < data.documents.size(); docid++) {
List<String> res = new ArrayList<>();
ArrayList<LinkedVector> sentences = data.documents.get(docid).sentences;
for (LinkedVector vector : sentences) {
for (int j = 0; j < vector.size(); j++) {
NEWord w = (NEWord) vector.get(j);
res.add(w.form + " " + w.neLabel + " " + w.neTypeLevel1);
}
res.add("");
}
LineIO.write(outputPath + "/" + docid + ".txt", res);
}
} else if (dataFormat.equals("-json")) {
File inputfiles = new File(inputPath);
List<TextAnnotation> tas = new ArrayList<>();
for (String f : inputfiles.list()) {
TextAnnotation ta = SerializationHelper.deserializeTextAnnotationFromFile(f, true);
tas.add(ta);
}
TextAnnotationConverter.Data2TextAnnotation(data, tas);
for (TextAnnotation ta : tas) {
SerializationHelper.serializeTextAnnotationToFile(ta, outputPath + "/" + ta.getId(), true);
}
} else {
throw new NotImplementedException("We do not yet support dataFormat of " + dataFormat + " yet.");
}
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.
the class Main method processInputString.
/**
* process the single input string, produce output on standard out if no output directory is
* defined, or produce the output in the output directory by the same file name as the input
* file, or if a specific output filename is specified, use that name.
*
* @param data the string to process
* @throws Exception if anything goes wrong.
*/
private void processInputString(String data) throws Exception {
data = StringEscapeUtils.unescapeHtml4(data);
TextAnnotation ta = tab.createTextAnnotation(data);
data = this.produceOutput(this.nerAnnotator.getView(ta), ta);
this.getResultProcessor().publish(data, Long.toString(System.currentTimeMillis()) + ".txt");
}
Aggregations