use of org.cogcomp.DatastoreException in project cogcomp-nlp by CogComp.
the class ExtentReader method getTextAnnotations.
public List<TextAnnotation> getTextAnnotations() throws InvalidPortException, InvalidEndpointException, IOException, JWNLException, DatastoreException {
List<TextAnnotation> ret = new ArrayList<>();
if (_corpus.equals("ACE")) {
ACEReaderWithTrueCaseFixer aceReader = null;
POSAnnotator posAnnotator = new POSAnnotator();
try {
aceReader = new ACEReaderWithTrueCaseFixer(_path, false);
for (TextAnnotation ta : aceReader) {
ta.addView(posAnnotator);
ret.add(ta);
}
} catch (Exception e) {
e.printStackTrace();
}
}
if (_corpus.equals("ERE")) {
EREMentionRelationReader ereMentionRelationReader = null;
POSAnnotator posAnnotator = new POSAnnotator();
try {
ereMentionRelationReader = new EREMentionRelationReader(EREDocumentReader.EreCorpus.ENR3, _path, false);
for (XmlTextAnnotation xta : ereMentionRelationReader) {
TextAnnotation ta = xta.getTextAnnotation();
ta.addView(posAnnotator);
ret.add(ta);
}
} catch (Exception e) {
e.printStackTrace();
}
}
if (_corpus.startsWith("COMBINED")) {
String realCorpus = _corpus.split("-")[1];
String mode = _corpus.split("-")[2];
int fold = Integer.parseInt(_corpus.split("-")[3]);
BIOCombinedReader bioCombinedReader = new BIOCombinedReader(fold, realCorpus + "-" + mode, "ALL", true);
for (Object ta = bioCombinedReader.next(); ta != null; ta = bioCombinedReader.next()) {
ret.add((TextAnnotation) ta);
}
}
return ret;
}
use of org.cogcomp.DatastoreException in project cogcomp-nlp by CogComp.
the class SenseManager method getLegalSensesMap.
private Map<String, Set<String>> getLegalSensesMap() {
Map<String, Set<String>> map = new HashMap<>();
Datastore ds = null;
File senseFile = null;
try {
ds = new Datastore(new ResourceConfigurator().getDefaultConfig());
senseFile = ds.getFile("edu.illinois.cs.cogcomp.verbsense", "sense-list.txt", 1.0, false);
} catch (InvalidPortException | InvalidEndpointException | DatastoreException e) {
e.printStackTrace();
}
try {
for (String line : LineIO.read(senseFile.getAbsolutePath())) {
String predicate = line.split("\t")[0];
String[] senseArray = line.split("\t")[1].split(",");
Set<String> senseSet = new HashSet<>(Arrays.asList(senseArray));
map.put(predicate, senseSet);
}
} catch (FileNotFoundException e) {
log.error("Unable to load list of legal senses: ", e);
System.exit(-1);
}
return map;
}
use of org.cogcomp.DatastoreException in project cogcomp-nlp by CogComp.
the class MemoryBasedW2V method loadVectors.
/**
* Loads up Word2Vec embeddings lazily
*/
private void loadVectors() {
if (vectors == null) {
File inputFile = null;
try {
inputFile = getFile();
} catch (DatastoreException e) {
e.printStackTrace();
logger.error("Error retrieving the embedding file from DataStore");
throw new RuntimeException("Error retrieving the embedding file from DataStore");
}
try (BufferedReader bf = new BufferedReader(new FileReader(inputFile))) {
logger.info("Reading Word2vec Embeddings from " + inputFile.getAbsolutePath());
vectors = new HashMap<>();
String line = bf.readLine();
String[] tokens = line.split(" ");
// The first line has the following schema --> #Terms #Vector_Dimensions
int dimNum = Integer.parseInt(tokens[1].trim());
if (dimNum != dimensions) {
bf.close();
throw new IllegalStateException("Number of dimensions in the embeddings file (" + dimNum + ") don't match the one in the config file (" + dimensions + ")");
}
int count = 0;
while ((line = bf.readLine()) != null) {
line = line.trim();
if (line.length() == 0)
continue;
tokens = line.trim().split(" ", 2);
String[] stringVec = tokens[1].split(" ");
if (stringVec.length != dimNum) {
bf.close();
throw new IllegalStateException("Possible Error in the embeddings file -- number of dimensions(" + dimNum + ") don't match -->" + tokens[1]);
}
String word = tokens[0].trim();
if (word.length() == 0)
continue;
double[] scores = new double[dimNum];
int i = 0;
for (String dim : stringVec) {
scores[i] = Double.parseDouble(dim);
i++;
}
DenseVector vec = new DenseVector(scores);
vectors.put(word, vec);
count++;
if (count % 100000 == 0)
logger.info("#W2V embeddings read: " + count);
}
} catch (IOException e) {
e.printStackTrace();
logger.error("IO Error while reading the W2V Embedding File");
throw new RuntimeException("IO Error while reading the W2V Embedding File");
} catch (IllegalStateException e) {
e.printStackTrace();
logger.error(e.getMessage());
throw new RuntimeException(e.getMessage());
}
}
}
use of org.cogcomp.DatastoreException in project cogcomp-nlp by CogComp.
the class QuestionTypeAnnotator method initialize.
@Override
public void initialize(ResourceManager rm) {
System.out.println("loading . . . ");
try {
Datastore dsNoCredentials = new Datastore(new ResourceConfigurator().getDefaultConfig());
File f = dsNoCredentials.getDirectory("org.cogcomp.question-typer", "question-typer-models", 1.0, false);
this.modelsFolder = f.getPath() + "/question-typer-models/";
System.out.println(modelsFolder + "QuestionFineTyper.lc");
} catch (InvalidPortException | DatastoreException | InvalidEndpointException e) {
e.printStackTrace();
}
fine = new QuestionFineTyper(modelsFolder + "QuestionFineTyper.lc", modelsFolder + "QuestionFineTyper.lex");
coarse = new QuestionCoarseTyper(modelsFolder + "QuestionCoarseTyper.lc", modelsFolder + "QuestionCoarseTyper.lex");
}
use of org.cogcomp.DatastoreException in project cogcomp-nlp by CogComp.
the class BrownClusters method get.
/**
* Initialze the brown cluster data. Clusters are stored in a static data structure to avoid reloading the same (read-only)
* clusters over and over.
* @param pathsToClusterFiles the files containing the data.
* @param thresholds
* @param isLowercaseBrownClusters
*/
public static BrownClusters get(Vector<String> pathsToClusterFiles, Vector<Integer> thresholds, Vector<Boolean> isLowercaseBrownClusters) {
boolean useLocalBrownCluster = true;
String key = null;
synchronized (INIT_SYNC) {
// first check for a cluster already loaded for this data.
key = getKey(pathsToClusterFiles);
if (!clusters.containsKey(key)) {
// check to see if all the paths exist on the local file system.
for (String path : pathsToClusterFiles) {
if (!new File(path).exists()) {
useLocalBrownCluster = false;
break;
}
}
// create the cluster data structure.
BrownClusters brownclusters = new BrownClusters();
brownclusters.isLowercaseBrownClustersByResource = new boolean[isLowercaseBrownClusters.size()];
brownclusters.wordToPathByResource = new ArrayList<>();
brownclusters.resources = new ArrayList<>();
if (!useLocalBrownCluster) {
// load everything from Minio
try {
Datastore dsNoCredentials = new Datastore(new ResourceConfigurator().getDefaultConfig());
File bcDirectory = dsNoCredentials.getDirectory("org.cogcomp.brown-clusters", "brown-clusters", 1.5, false);
for (int i = 0; i < pathsToClusterFiles.size(); i++) {
THashMap<String, String> h = new THashMap<>();
// Here we check if local resource is specified.
String bcFilePath = bcDirectory.getPath() + File.separator + pathsToClusterFiles.elementAt(i);
InputStream is = new FileInputStream(bcFilePath);
InFile in = new InFile(is);
String line = in.readLine();
while (line != null) {
StringTokenizer st = new StringTokenizer(line);
String path = st.nextToken();
String word = st.nextToken();
int occ = Integer.parseInt(st.nextToken());
if (occ >= thresholds.elementAt(i)) {
h.put(word, path);
}
line = in.readLine();
}
brownclusters.wordToPathByResource.add(h);
brownclusters.isLowercaseBrownClustersByResource[i] = isLowercaseBrownClusters.elementAt(i);
brownclusters.resources.add(pathsToClusterFiles.elementAt(i));
in.close();
}
logger.info("Loaded brown cluster from " + key + " from Minio system.");
clusters.put(key, brownclusters);
} catch (InvalidPortException | InvalidEndpointException | DatastoreException | FileNotFoundException e) {
throw new RuntimeException("Brown Clusters could not be loaded.", e);
}
} else {
// load the clusters from the local file system.
try {
for (int i = 0; i < pathsToClusterFiles.size(); i++) {
THashMap<String, String> h = new THashMap<>();
// Here we check if local resource is specified.
String bcFilePath = pathsToClusterFiles.elementAt(i);
InputStream is;
is = new FileInputStream(bcFilePath);
InFile in = new InFile(is);
String line = in.readLine();
while (line != null) {
StringTokenizer st = new StringTokenizer(line);
String path = st.nextToken();
String word = st.nextToken();
int occ = Integer.parseInt(st.nextToken());
if (occ >= thresholds.elementAt(i)) {
h.put(word, path);
}
line = in.readLine();
}
brownclusters.wordToPathByResource.add(h);
brownclusters.isLowercaseBrownClustersByResource[i] = isLowercaseBrownClusters.elementAt(i);
brownclusters.resources.add(pathsToClusterFiles.elementAt(i));
in.close();
}
logger.info("Loaded brown cluster from " + key + " from the local file system.");
clusters.put(key, brownclusters);
} catch (FileNotFoundException e) {
throw new RuntimeException("Brown Clusters files existed on local disk, but could not be loaded.", e);
}
}
}
}
return clusters.get(key);
}
Aggregations