use of edu.illinois.cs.cogcomp.nlp.tokenizer.StatefulTokenizer in project cogcomp-nlp by CogComp.
the class W2VDatalessAnnotator method main.
/**
* @param args config: config file path testFile: Test File
*/
public static void main(String[] args) {
CommandLine cmd = ESADatalessAnnotator.getCMDOpts(args);
ResourceManager rm;
try {
String configFile = cmd.getOptionValue("config", "config/project.properties");
ResourceManager nonDefaultRm = new ResourceManager(configFile);
rm = new W2VDatalessConfigurator().getConfig(nonDefaultRm);
} catch (IOException e) {
rm = new W2VDatalessConfigurator().getDefaultConfig();
}
String testFile = cmd.getOptionValue("testFile", "data/graphicsTestDocument.txt");
StringBuilder sb = new StringBuilder();
String line;
try (BufferedReader br = new BufferedReader(new FileReader(new File(testFile)))) {
while ((line = br.readLine()) != null) {
sb.append(line);
sb.append(" ");
}
String text = sb.toString().trim();
TokenizerTextAnnotationBuilder taBuilder = new TokenizerTextAnnotationBuilder(new StatefulTokenizer());
TextAnnotation ta = taBuilder.createTextAnnotation(text);
W2VDatalessAnnotator datalessAnnotator = new W2VDatalessAnnotator(rm);
datalessAnnotator.addView(ta);
List<Constituent> annots = ta.getView(ViewNames.DATALESS_W2V).getConstituents();
System.out.println("Predicted LabelIDs:");
for (Constituent annot : annots) {
System.out.println(annot.getLabel());
}
Map<String, String> labelNameMap = DatalessAnnotatorUtils.getLabelNameMap(rm.getString(DatalessConfigurator.LabelName_Path.key));
System.out.println("Predicted Labels:");
for (Constituent annot : annots) {
System.out.println(labelNameMap.get(annot.getLabel()));
}
} catch (FileNotFoundException e) {
e.printStackTrace();
logger.error("Test File not found at " + testFile + " ... exiting");
System.exit(-1);
} catch (AnnotatorException e) {
e.printStackTrace();
logger.error("Error Annotating the Test Document with the Dataless View ... exiting");
System.exit(-1);
} catch (IOException e) {
e.printStackTrace();
logger.error("IO Error while reading the test file ... exiting");
System.exit(-1);
}
}
use of edu.illinois.cs.cogcomp.nlp.tokenizer.StatefulTokenizer in project cogcomp-nlp by CogComp.
the class MainClass method annotate.
private static void annotate(String filepath) throws IOException {
DepAnnotator annotator = new DepAnnotator();
TextAnnotationBuilder taBuilder = new TokenizerTextAnnotationBuilder(new StatefulTokenizer(true, false));
Preprocessor preprocessor = new Preprocessor();
Files.lines(Paths.get(filepath)).forEach(line -> {
TextAnnotation ta = taBuilder.createTextAnnotation(line);
try {
preprocessor.annotate(ta);
annotator.addView(ta);
System.out.println(ta.getView(annotator.getViewName()).toString());
} catch (AnnotatorException e) {
e.printStackTrace();
}
});
}
use of edu.illinois.cs.cogcomp.nlp.tokenizer.StatefulTokenizer in project cogcomp-nlp by CogComp.
the class ESADatalessTest method getTextAnnotation.
private TextAnnotation getTextAnnotation(String text) {
TokenizerTextAnnotationBuilder taBuilder = new TokenizerTextAnnotationBuilder(new StatefulTokenizer());
TextAnnotation ta = taBuilder.createTextAnnotation(text);
return ta;
}
use of edu.illinois.cs.cogcomp.nlp.tokenizer.StatefulTokenizer in project cogcomp-nlp by CogComp.
the class MultiLingualTokenizer method getTokenizer.
public static TextAnnotationBuilder getTokenizer(String lang) {
if (tokenizerMap == null)
tokenizerMap = new HashMap<>();
if (!tokenizerMap.containsKey(lang)) {
TextAnnotationBuilder tokenizer = null;
if (lang.equals("en"))
tokenizer = new TokenizerTextAnnotationBuilder(new StatefulTokenizer());
else if (lang.equals("es"))
tokenizer = new TokenizerTextAnnotationBuilder(new StanfordAnalyzer());
else if (lang.equals("zh"))
tokenizer = new TokenizerTextAnnotationBuilder(new CharacterTokenizer());
else if (lang.equals("th"))
tokenizer = new TokenizerTextAnnotationBuilder(new ThaiTokenizer());
else if (lang.equals("ja"))
tokenizer = new TokenizerTextAnnotationBuilder(new JapaneseTokenizer());
else
tokenizer = new TokenizerTextAnnotationBuilder(new WhiteSpaceTokenizer());
tokenizerMap.put(lang, tokenizer);
}
return tokenizerMap.get(lang);
}
use of edu.illinois.cs.cogcomp.nlp.tokenizer.StatefulTokenizer in project cogcomp-nlp by CogComp.
the class ClauseAnnotatorFactory method buildPipeline.
/**
* @param rm non-default config options
* @return AnnotatorService with specified NLP components
* @throws IOException
* @throws AnnotatorException
*/
public static BasicAnnotatorService buildPipeline(ResourceManager rm) throws IOException, AnnotatorException {
TextAnnotationBuilder taBldr = new TokenizerTextAnnotationBuilder(new StatefulTokenizer(false, false));
Map<String, Annotator> annotators = buildAnnotators();
return new BasicAnnotatorService(taBldr, annotators, rm);
}
Aggregations