use of io.minio.errors.InvalidEndpointException in project cogcomp-nlp by CogComp.
the class BrownClusters method get.
/**
* Initialze the brown cluster data. Clusters are stored in a static data structure to avoid reloading the same (read-only)
* clusters over and over.
* @param pathsToClusterFiles the files containing the data.
* @param thresholds
* @param isLowercaseBrownClusters
*/
public static BrownClusters get(Vector<String> pathsToClusterFiles, Vector<Integer> thresholds, Vector<Boolean> isLowercaseBrownClusters) {
boolean useLocalBrownCluster = true;
String key = null;
synchronized (INIT_SYNC) {
// first check for a cluster already loaded for this data.
key = getKey(pathsToClusterFiles);
if (!clusters.containsKey(key)) {
// check to see if all the paths exist on the local file system.
for (String path : pathsToClusterFiles) {
if (!new File(path).exists()) {
useLocalBrownCluster = false;
break;
}
}
// create the cluster data structure.
BrownClusters brownclusters = new BrownClusters();
brownclusters.isLowercaseBrownClustersByResource = new boolean[isLowercaseBrownClusters.size()];
brownclusters.wordToPathByResource = new ArrayList<>();
brownclusters.resources = new ArrayList<>();
if (!useLocalBrownCluster) {
// load everything from Minio
try {
Datastore dsNoCredentials = new Datastore(new ResourceConfigurator().getDefaultConfig());
File bcDirectory = dsNoCredentials.getDirectory("org.cogcomp.brown-clusters", "brown-clusters", 1.5, false);
for (int i = 0; i < pathsToClusterFiles.size(); i++) {
THashMap<String, String> h = new THashMap<>();
// Here we check if local resource is specified.
String bcFilePath = bcDirectory.getPath() + File.separator + pathsToClusterFiles.elementAt(i);
InputStream is = new FileInputStream(bcFilePath);
InFile in = new InFile(is);
String line = in.readLine();
while (line != null) {
StringTokenizer st = new StringTokenizer(line);
String path = st.nextToken();
String word = st.nextToken();
int occ = Integer.parseInt(st.nextToken());
if (occ >= thresholds.elementAt(i)) {
h.put(word, path);
}
line = in.readLine();
}
brownclusters.wordToPathByResource.add(h);
brownclusters.isLowercaseBrownClustersByResource[i] = isLowercaseBrownClusters.elementAt(i);
brownclusters.resources.add(pathsToClusterFiles.elementAt(i));
in.close();
}
logger.info("Loaded brown cluster from " + key + " from Minio system.");
clusters.put(key, brownclusters);
} catch (InvalidPortException | InvalidEndpointException | DatastoreException | FileNotFoundException e) {
throw new RuntimeException("Brown Clusters could not be loaded.", e);
}
} else {
// load the clusters from the local file system.
try {
for (int i = 0; i < pathsToClusterFiles.size(); i++) {
THashMap<String, String> h = new THashMap<>();
// Here we check if local resource is specified.
String bcFilePath = pathsToClusterFiles.elementAt(i);
InputStream is;
is = new FileInputStream(bcFilePath);
InFile in = new InFile(is);
String line = in.readLine();
while (line != null) {
StringTokenizer st = new StringTokenizer(line);
String path = st.nextToken();
String word = st.nextToken();
int occ = Integer.parseInt(st.nextToken());
if (occ >= thresholds.elementAt(i)) {
h.put(word, path);
}
line = in.readLine();
}
brownclusters.wordToPathByResource.add(h);
brownclusters.isLowercaseBrownClustersByResource[i] = isLowercaseBrownClusters.elementAt(i);
brownclusters.resources.add(pathsToClusterFiles.elementAt(i));
in.close();
}
logger.info("Loaded brown cluster from " + key + " from the local file system.");
clusters.put(key, brownclusters);
} catch (FileNotFoundException e) {
throw new RuntimeException("Brown Clusters files existed on local disk, but could not be loaded.", e);
}
}
}
}
return clusters.get(key);
}
use of io.minio.errors.InvalidEndpointException in project cogcomp-nlp by CogComp.
the class ModelLoader method load.
/**
* Load the models wherever they are found. Check file system first, then classpath, and finally get it
* from Minio datastore.
* @param rm the resource manager.
* @param training if we are training.
* @param viewName the name of the view identifies the model.
* @param cp the parameters for the calling model.
*/
public static void load(ResourceManager rm, String viewName, boolean training, ParametersForLbjCode cp) {
// the loaded built into the model will check the local file system and the jar files in the classpath.
String modelPath = cp.pathToModelFile;
String modelFilePath = modelPath + ".level1";
java.io.File modelFile = new File(modelFilePath);
NETaggerLevel1 tagger1 = null;
NETaggerLevel2 tagger2 = null;
if (modelFile.exists()) {
tagger1 = new NETaggerLevel1(modelPath + ".level1", modelPath + ".level1.lex");
logger.info("Reading L1 model from file : " + modelPath + ".level2");
if (cp.featuresToUse.containsKey("PredictionsLevel1")) {
tagger2 = new NETaggerLevel2(modelPath + ".level2", modelPath + ".level2.lex");
logger.info("Reading L2 model from file : " + modelPath + ".level2");
} else {
logger.info("L2 model not required.");
}
} else if (IOUtilities.existsInClasspath(NETaggerLevel1.class, modelFilePath)) {
tagger1 = new NETaggerLevel1(modelPath + ".level1", modelPath + ".level1.lex");
logger.info("Reading L1 model from classpath : " + modelPath + ".level2");
if (cp.featuresToUse.containsKey("PredictionsLevel1")) {
tagger2 = new NETaggerLevel2(modelPath + ".level2", modelPath + ".level2.lex");
logger.info("Reading L2 model from classpath : " + modelPath + ".level2");
} else {
logger.info("L2 model not required.");
}
} else if (training) {
// we are training a new model, so it it doesn't exist, we don't care, just create a
// container.
tagger1 = new NETaggerLevel1(modelPath + ".level1", modelPath + ".level1.lex");
logger.info("Reading L1 model from file : " + modelPath + ".level2");
if (cp.featuresToUse.containsKey("PredictionsLevel1")) {
tagger2 = new NETaggerLevel2(modelPath + ".level2", modelPath + ".level2.lex");
logger.info("Reading L2 model from file : " + modelPath + ".level2");
} else {
logger.info("L2 model not required.");
}
} else {
// all else has filed, load from the datastore, create artifact ids based on the view
// name and training data designation.
String dataset;
String lowercaseViewName = viewName.toLowerCase();
if (lowercaseViewName.contains(ViewNames.NER_CONLL.toLowerCase())) {
dataset = "enron-conll";
} else if (lowercaseViewName.contains(ViewNames.NER_ONTONOTES.toLowerCase())) {
dataset = "ontonotes";
} else {
// not a standard model, and we can't find it on the command line.
throw new IllegalArgumentException("The NER models could not be found at \"" + modelPath + "\", and no default with view name " + viewName);
}
String data_split;
if (!rm.containsKey(NerBaseConfigurator.TRAINED_ON))
data_split = NerBaseConfigurator.TRAINED_ON_ALL_DATA;
else
data_split = rm.getString(NerBaseConfigurator.TRAINED_ON);
try {
Datastore ds = new Datastore(new ResourceConfigurator().getConfig(rm));
String artifact_id = "ner-model-" + dataset + "-" + data_split;
File modelDir = ds.getDirectory("edu.illinois.cs.cogcomp.ner", artifact_id, 4.0, false);
String model = "";
if (modelDir.getPath().contains("conll")) {
model = modelDir.getPath() + "/model/EnronCoNLL.model";
} else {
model = modelDir.getPath() + "/model/OntoNotes.model";
}
tagger1 = new NETaggerLevel1(model + ".level1", model + ".level1.lex");
if (cp.featuresToUse.containsKey("PredictionsLevel1")) {
tagger2 = new NETaggerLevel2(model + ".level2", model + ".level2.lex");
}
} catch (InvalidPortException | DatastoreException | InvalidEndpointException e) {
e.printStackTrace();
}
}
cp.taggerLevel1 = tagger1;
cp.taggerLevel2 = tagger2;
}
use of io.minio.errors.InvalidEndpointException in project cogcomp-nlp by CogComp.
the class ExtentReader method getTextAnnotations.
public List<TextAnnotation> getTextAnnotations() throws InvalidPortException, InvalidEndpointException, IOException, JWNLException, DatastoreException {
List<TextAnnotation> ret = new ArrayList<>();
if (_corpus.equals("ACE")) {
ACEReaderWithTrueCaseFixer aceReader = null;
POSAnnotator posAnnotator = new POSAnnotator();
try {
aceReader = new ACEReaderWithTrueCaseFixer(_path, false);
for (TextAnnotation ta : aceReader) {
ta.addView(posAnnotator);
ret.add(ta);
}
} catch (Exception e) {
e.printStackTrace();
}
}
if (_corpus.equals("ERE")) {
EREMentionRelationReader ereMentionRelationReader = null;
POSAnnotator posAnnotator = new POSAnnotator();
try {
ereMentionRelationReader = new EREMentionRelationReader(EREDocumentReader.EreCorpus.ENR3, _path, false);
for (XmlTextAnnotation xta : ereMentionRelationReader) {
TextAnnotation ta = xta.getTextAnnotation();
ta.addView(posAnnotator);
ret.add(ta);
}
} catch (Exception e) {
e.printStackTrace();
}
}
if (_corpus.startsWith("COMBINED")) {
String realCorpus = _corpus.split("-")[1];
String mode = _corpus.split("-")[2];
int fold = Integer.parseInt(_corpus.split("-")[3]);
BIOCombinedReader bioCombinedReader = new BIOCombinedReader(fold, realCorpus + "-" + mode, "ALL", true);
for (Object ta = bioCombinedReader.next(); ta != null; ta = bioCombinedReader.next()) {
ret.add((TextAnnotation) ta);
}
}
return ret;
}
use of io.minio.errors.InvalidEndpointException in project cogcomp-nlp by CogComp.
the class SenseManager method getLegalSensesMap.
private Map<String, Set<String>> getLegalSensesMap() {
Map<String, Set<String>> map = new HashMap<>();
Datastore ds = null;
File senseFile = null;
try {
ds = new Datastore(new ResourceConfigurator().getDefaultConfig());
senseFile = ds.getFile("edu.illinois.cs.cogcomp.verbsense", "sense-list.txt", 1.0, false);
} catch (InvalidPortException | InvalidEndpointException | DatastoreException e) {
e.printStackTrace();
}
try {
for (String line : LineIO.read(senseFile.getAbsolutePath())) {
String predicate = line.split("\t")[0];
String[] senseArray = line.split("\t")[1].split(",");
Set<String> senseSet = new HashSet<>(Arrays.asList(senseArray));
map.put(predicate, senseSet);
}
} catch (FileNotFoundException e) {
log.error("Unable to load list of legal senses: ", e);
System.exit(-1);
}
return map;
}
use of io.minio.errors.InvalidEndpointException in project cogcomp-nlp by CogComp.
the class QuestionTypeAnnotator method initialize.
@Override
public void initialize(ResourceManager rm) {
System.out.println("loading . . . ");
try {
Datastore dsNoCredentials = new Datastore(new ResourceConfigurator().getDefaultConfig());
File f = dsNoCredentials.getDirectory("org.cogcomp.question-typer", "question-typer-models", 1.0, false);
this.modelsFolder = f.getPath() + "/question-typer-models/";
System.out.println(modelsFolder + "QuestionFineTyper.lc");
} catch (InvalidPortException | DatastoreException | InvalidEndpointException e) {
e.printStackTrace();
}
fine = new QuestionFineTyper(modelsFolder + "QuestionFineTyper.lc", modelsFolder + "QuestionFineTyper.lex");
coarse = new QuestionCoarseTyper(modelsFolder + "QuestionCoarseTyper.lc", modelsFolder + "QuestionCoarseTyper.lex");
}
Aggregations