use of edu.illinois.cs.cogcomp.datalessclassification.config.W2VDatalessConfigurator in project cogcomp-nlp by CogComp.
the class PipelineFactory method buildAnnotators.
/**
* instantiate a set of annotators for use in an AnnotatorService object by default, will use
* lazy initialization where possible -- change this behavior with the
* {@link PipelineConfigurator#USE_LAZY_INITIALIZATION} property.
*
* @param nonDefaultRm ResourceManager with all non-default values for Annotators
* @return a Map from annotator view name to annotator
*/
private static Map<String, Annotator> buildAnnotators(ResourceManager nonDefaultRm) throws IOException {
ResourceManager rm = new PipelineConfigurator().getConfig(new Stanford331Configurator().getConfig(nonDefaultRm));
String timePerSentence = rm.getString(Stanford331Configurator.STFRD_TIME_PER_SENTENCE);
String maxParseSentenceLength = rm.getString(Stanford331Configurator.STFRD_MAX_SENTENCE_LENGTH);
boolean useLazyInitialization = rm.getBoolean(PipelineConfigurator.USE_LAZY_INITIALIZATION.key, PipelineConfigurator.TRUE);
Map<String, Annotator> viewGenerators = new HashMap<>();
if (rm.getBoolean(PipelineConfigurator.USE_POS)) {
POSAnnotator pos = new POSAnnotator();
viewGenerators.put(pos.getViewName(), pos);
}
if (rm.getBoolean(PipelineConfigurator.USE_LEMMA)) {
IllinoisLemmatizer lem = new IllinoisLemmatizer(rm);
viewGenerators.put(lem.getViewName(), lem);
}
if (rm.getBoolean(PipelineConfigurator.USE_SHALLOW_PARSE)) {
viewGenerators.put(ViewNames.SHALLOW_PARSE, new ChunkerAnnotator());
}
if (rm.getBoolean(PipelineConfigurator.USE_NER_CONLL)) {
NERAnnotator nerConll = NerAnnotatorManager.buildNerAnnotator(rm, ViewNames.NER_CONLL);
viewGenerators.put(nerConll.getViewName(), nerConll);
}
if (rm.getBoolean(PipelineConfigurator.USE_NER_ONTONOTES)) {
NERAnnotator nerOntonotes = NerAnnotatorManager.buildNerAnnotator(rm, ViewNames.NER_ONTONOTES);
viewGenerators.put(nerOntonotes.getViewName(), nerOntonotes);
}
if (rm.getBoolean(PipelineConfigurator.USE_DEP)) {
DepAnnotator dep = new DepAnnotator();
viewGenerators.put(dep.getViewName(), dep);
}
if (rm.getBoolean(PipelineConfigurator.USE_STANFORD_DEP) || rm.getBoolean(PipelineConfigurator.USE_STANFORD_PARSE)) {
Properties stanfordProps = new Properties();
stanfordProps.put("annotators", "pos, parse");
stanfordProps.put("parse.originalDependencies", true);
stanfordProps.put("parse.maxlen", maxParseSentenceLength);
// per sentence? could be per
stanfordProps.put("parse.maxtime", timePerSentence);
// document but no idea from
// stanford javadoc
POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps);
ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps);
int maxLength = Integer.parseInt(maxParseSentenceLength);
boolean throwExceptionOnSentenceLengthCheck = rm.getBoolean(Stanford331Configurator.THROW_EXCEPTION_ON_FAILED_LENGTH_CHECK.key);
if (rm.getBoolean(PipelineConfigurator.USE_STANFORD_DEP)) {
StanfordDepHandler depParser = new StanfordDepHandler(posAnnotator, parseAnnotator, maxLength, throwExceptionOnSentenceLengthCheck);
viewGenerators.put(depParser.getViewName(), depParser);
}
if (rm.getBoolean(PipelineConfigurator.USE_STANFORD_PARSE)) {
StanfordParseHandler parser = new StanfordParseHandler(posAnnotator, parseAnnotator, maxLength, throwExceptionOnSentenceLengthCheck);
viewGenerators.put(parser.getViewName(), parser);
}
}
if (rm.getBoolean(PipelineConfigurator.USE_SRL_VERB)) {
Properties verbProps = new Properties();
String verbType = SRLType.Verb.name();
verbProps.setProperty(SrlConfigurator.SRL_TYPE.key, verbType);
ResourceManager verbRm = new ResourceManager(verbProps);
rm = Configurator.mergeProperties(rm, verbRm);
try {
SemanticRoleLabeler verbSrl = new SemanticRoleLabeler(rm, useLazyInitialization);
viewGenerators.put(ViewNames.SRL_VERB, verbSrl);
} catch (Exception e) {
throw new IOException("SRL verb cannot init: " + e.getMessage());
}
}
if (rm.getBoolean(PipelineConfigurator.USE_SRL_NOM)) {
Properties nomProps = new Properties();
String nomType = SRLType.Nom.name();
nomProps.setProperty(SrlConfigurator.SRL_TYPE.key, nomType);
ResourceManager nomRm = new ResourceManager(nomProps);
rm = Configurator.mergeProperties(rm, nomRm);
try {
SemanticRoleLabeler nomSrl = new SemanticRoleLabeler(rm, useLazyInitialization);
// note that you can't call nomSrl (or verbSrl).getViewName() as it may not be
// initialized yet
viewGenerators.put(ViewNames.SRL_NOM, nomSrl);
// viewGenerators.put(ViewNames.SRL_NOM,new SrlHandler("NomSRL", "5.1.9", nomType,
// ViewNames.SRL_NOM,
// useLazyInitialization, rm));
} catch (Exception e) {
throw new IOException("SRL nom cannot init .." + e.getMessage());
}
}
if (rm.getBoolean(PipelineConfigurator.USE_QUANTIFIER)) {
Quantifier quantifierAnnotator = new Quantifier();
viewGenerators.put(ViewNames.QUANTITIES, quantifierAnnotator);
}
if (rm.getBoolean(PipelineConfigurator.USE_TRANSLITERATION)) {
for (Language lang : TransliterationAnnotator.supportedLanguages) {
TransliterationAnnotator transliterationAnnotator = new TransliterationAnnotator(true, lang);
viewGenerators.put(ViewNames.TRANSLITERATION + "_" + lang.getCode(), transliterationAnnotator);
}
}
if (rm.getBoolean(PipelineConfigurator.USE_SRL_PREP)) {
PrepSRLAnnotator prepSRLAnnotator = new PrepSRLAnnotator();
viewGenerators.put(ViewNames.SRL_PREP, prepSRLAnnotator);
}
if (rm.getBoolean(PipelineConfigurator.USE_SRL_COMMA)) {
CommaLabeler commaLabeler = new CommaLabeler();
viewGenerators.put(ViewNames.SRL_COMMA, commaLabeler);
}
if (rm.getBoolean(PipelineConfigurator.USE_VERB_SENSE)) {
VerbSenseAnnotator verbSense = new VerbSenseAnnotator();
viewGenerators.put(ViewNames.VERB_SENSE, verbSense);
}
if (rm.getBoolean(PipelineConfigurator.USE_MENTION)) {
MentionAnnotator mentionAnnotator = new MentionAnnotator("ACE_TYPE");
viewGenerators.put(ViewNames.MENTION, mentionAnnotator);
}
if (rm.getBoolean(PipelineConfigurator.USE_RELATION)) {
viewGenerators.put(ViewNames.RELATION, new RelationAnnotator(true));
}
if (rm.getBoolean(PipelineConfigurator.USE_TIMEX3)) {
Properties rmProps = new TemporalChunkerConfigurator().getDefaultConfig().getProperties();
TemporalChunkerAnnotator tca = new TemporalChunkerAnnotator(new ResourceManager(rmProps));
viewGenerators.put(ViewNames.TIMEX3, tca);
}
if (rm.getBoolean(PipelineConfigurator.USE_DATALESS_ESA)) {
rm = new ESADatalessConfigurator().getConfig(nonDefaultRm);
ESADatalessAnnotator esaDataless = new ESADatalessAnnotator(rm);
viewGenerators.put(ViewNames.DATALESS_ESA, esaDataless);
}
if (rm.getBoolean(PipelineConfigurator.USE_DATALESS_W2V)) {
rm = new W2VDatalessConfigurator().getConfig(nonDefaultRm);
W2VDatalessAnnotator w2vDataless = new W2VDatalessAnnotator(rm);
viewGenerators.put(ViewNames.DATALESS_W2V, w2vDataless);
}
if (rm.getBoolean(PipelineConfigurator.USE_QUESTION_TYPER)) {
QuestionTypeAnnotator questionTyper = new QuestionTypeAnnotator();
viewGenerators.put(ViewNames.QUESTION_TYPE, questionTyper);
}
return viewGenerators;
}
use of edu.illinois.cs.cogcomp.datalessclassification.config.W2VDatalessConfigurator in project cogcomp-nlp by CogComp.
the class W2VDatalessTest method testPredictions.
@Test
public void testPredictions() {
try {
configFile = "config/project.properties";
ResourceManager nonDefaultRm = new ResourceManager(configFile);
ResourceManager rm = new W2VDatalessConfigurator().getConfig(nonDefaultRm);
dataless = new W2VDatalessAnnotator(rm);
documents = new ArrayList<>();
String doc1 = "i m looking for some recommendations for screen capture programs a couple" + " of issues ago pc mag listed as editor s choices both conversion artist" + " and hijaak for windows anyone have any experience with those or some others" + " i m trying to get an alpha manual in the next few days and i m not making much" + " progress with the screen shots i m currently using dodot and i m about to burn it" + " and the disks it rode it on it s got a lot of freaky bugs and oversights that are " + "driving me crazy tonight it decided that for any graphic it writes out as a tiff " + "file that s under a certain arbitrary size it will swap the left and right sides of" + " the picture usually it confines itself to not copying things to the clipboard so i " + "have to save and load pix for editing in paintbrush or crashing every hour or so the " + "one nice thing it has though is it s dither option you d think that this would turn " + "colors into dots which it does if you go from say colors to colors but if you go " + "from or colors to b w you can set a threshold level for which colors turn to black " + "and which turn to white for me this is useful because i can turn light grays on buttons" + " to white and the dark grays to black and thereby preserve the d effect on buttons and " + "other parts of the window if you understood my description can you tell me if another " + "less buggy program can do this as well much thanks for any help signature david delgreco " + "what lies behind us and what lies technically a writer before us are tiny matters compared " + "delgreco rahul net to what lies within us oliver wendell holmes david f delgreco delgreco rahul " + "net recommendation for screen capture program";
documents.add(doc1);
String doc2 = "yes i know it s nowhere near christmas time but i m gonna loose net access in a few days maybe " + "a week or if i m lucky and wanted to post this for interested people to save till xmas " + "note bell labs is a good place if you have a phd and a good boss i have neither subject " + "xmas light set with levels of brightness another version of a variable brightness xmas " + "light set this set starts with a blinker bulb string diagram orginal way set 0v b b " + "0rtn modified set for level brightness string 0v 0k w string b 0v rtn note no mods to " + "wiring to the right of this point only one blinker is used note that the blinker " + "would not have as much current thru it as the string bulbs because of the second " + "string of bulbs in parallel with it that s why the use of the 0k w resistor here to " + "add extra current thru the blinker to make up for the current shunted thru the second " + "string while the blinker is glowing and the second string is not glowing when the " + "blinker goes open this resistor has only a slight effect on the brightness of the " + "strings s slightly dimmer s slightly brighter or use a w 0v bulb in place of the 0k " + "resistor if you can get one caution do not replace with a standard c bulb as these " + "draw too much current and burn out the blinker c approx w what you ll see when it s " + "working powerup string will light at full brightness and b will be lit bypassing most " + "of the current from the second string making them not light b will open placing both " + "strings in series making the string that was out to glow at a low brightness and the " + "other string that was on before to glow at reduced brightness be sure to wire and insulate" + " the splices resistor leads and cut wires in a safe manner level brightness xmas light " + "set for easter";
documents.add(doc2);
docLabels = new ArrayList<>();
Set<String> docLabels1 = new HashSet<>(Arrays.asList("computer", "comp.os.ms.windows.misc"));
docLabels.add(docLabels1);
Set<String> docLabels2 = new HashSet<>(Arrays.asList("computer", "comp.windows.x"));
docLabels.add(docLabels2);
} catch (IOException e) {
e.printStackTrace();
System.out.println("IO Error while initializing the annotator .. " + e.getMessage());
fail("IO Error while initializing the annotator .. " + e.getMessage());
}
try {
for (int i = 0; i < documents.size(); i++) {
// String docText = getDocumentText(docPaths.get(i));
String docText = documents.get(i);
Set<String> docPredictions = getPredictions(getTextAnnotation(docText), dataless);
System.out.println("Doc" + i + ": Gold LabelIDs:");
for (String goldLabel : docLabels.get(i)) {
System.out.println(goldLabel);
}
System.out.println("Doc" + i + ": Predicted LabelIDs:");
for (String predictedLabel : docPredictions) {
System.out.println(predictedLabel);
}
System.out.println();
assertTrue(checkSetEquality(docLabels.get(i), docPredictions));
}
} catch (AnnotatorException e) {
e.printStackTrace();
System.out.println("Error annotating the document .. " + e.getMessage());
fail("Error annotating the document .. " + e.getMessage());
}
}
use of edu.illinois.cs.cogcomp.datalessclassification.config.W2VDatalessConfigurator in project cogcomp-nlp by CogComp.
the class W2VDatalessAnnotator method main.
/**
* @param args config: config file path testFile: Test File
*/
public static void main(String[] args) {
CommandLine cmd = ESADatalessAnnotator.getCMDOpts(args);
ResourceManager rm;
try {
String configFile = cmd.getOptionValue("config", "config/project.properties");
ResourceManager nonDefaultRm = new ResourceManager(configFile);
rm = new W2VDatalessConfigurator().getConfig(nonDefaultRm);
} catch (IOException e) {
rm = new W2VDatalessConfigurator().getDefaultConfig();
}
String testFile = cmd.getOptionValue("testFile", "data/graphicsTestDocument.txt");
StringBuilder sb = new StringBuilder();
String line;
try (BufferedReader br = new BufferedReader(new FileReader(new File(testFile)))) {
while ((line = br.readLine()) != null) {
sb.append(line);
sb.append(" ");
}
String text = sb.toString().trim();
TokenizerTextAnnotationBuilder taBuilder = new TokenizerTextAnnotationBuilder(new StatefulTokenizer());
TextAnnotation ta = taBuilder.createTextAnnotation(text);
W2VDatalessAnnotator datalessAnnotator = new W2VDatalessAnnotator(rm);
datalessAnnotator.addView(ta);
List<Constituent> annots = ta.getView(ViewNames.DATALESS_W2V).getConstituents();
System.out.println("Predicted LabelIDs:");
for (Constituent annot : annots) {
System.out.println(annot.getLabel());
}
Map<String, String> labelNameMap = DatalessAnnotatorUtils.getLabelNameMap(rm.getString(DatalessConfigurator.LabelName_Path.key));
System.out.println("Predicted Labels:");
for (Constituent annot : annots) {
System.out.println(labelNameMap.get(annot.getLabel()));
}
} catch (FileNotFoundException e) {
e.printStackTrace();
logger.error("Test File not found at " + testFile + " ... exiting");
System.exit(-1);
} catch (AnnotatorException e) {
e.printStackTrace();
logger.error("Error Annotating the Test Document with the Dataless View ... exiting");
System.exit(-1);
} catch (IOException e) {
e.printStackTrace();
logger.error("IO Error while reading the test file ... exiting");
System.exit(-1);
}
}
Aggregations