Search in sources :

Example 1 with InvalidEndpointException

use of io.minio.errors.InvalidEndpointException in project cogcomp-nlp by CogComp.

the class BrownClusters method get.

/**
 * Initialze the brown cluster data. Clusters are stored in a static data structure to avoid reloading the same (read-only)
 * clusters over and over.
 * @param pathsToClusterFiles the files containing the data.
 * @param thresholds
 * @param isLowercaseBrownClusters
 */
public static BrownClusters get(Vector<String> pathsToClusterFiles, Vector<Integer> thresholds, Vector<Boolean> isLowercaseBrownClusters) {
    boolean useLocalBrownCluster = true;
    String key = null;
    synchronized (INIT_SYNC) {
        // first check for a cluster already loaded for this data.
        key = getKey(pathsToClusterFiles);
        if (!clusters.containsKey(key)) {
            // check to see if all the paths exist on the local file system.
            for (String path : pathsToClusterFiles) {
                if (!new File(path).exists()) {
                    useLocalBrownCluster = false;
                    break;
                }
            }
            // create the cluster data structure.
            BrownClusters brownclusters = new BrownClusters();
            brownclusters.isLowercaseBrownClustersByResource = new boolean[isLowercaseBrownClusters.size()];
            brownclusters.wordToPathByResource = new ArrayList<>();
            brownclusters.resources = new ArrayList<>();
            if (!useLocalBrownCluster) {
                // load everything from Minio
                try {
                    Datastore dsNoCredentials = new Datastore(new ResourceConfigurator().getDefaultConfig());
                    File bcDirectory = dsNoCredentials.getDirectory("org.cogcomp.brown-clusters", "brown-clusters", 1.5, false);
                    for (int i = 0; i < pathsToClusterFiles.size(); i++) {
                        THashMap<String, String> h = new THashMap<>();
                        // Here we check if local resource is specified.
                        String bcFilePath = bcDirectory.getPath() + File.separator + pathsToClusterFiles.elementAt(i);
                        InputStream is = new FileInputStream(bcFilePath);
                        InFile in = new InFile(is);
                        String line = in.readLine();
                        while (line != null) {
                            StringTokenizer st = new StringTokenizer(line);
                            String path = st.nextToken();
                            String word = st.nextToken();
                            int occ = Integer.parseInt(st.nextToken());
                            if (occ >= thresholds.elementAt(i)) {
                                h.put(word, path);
                            }
                            line = in.readLine();
                        }
                        brownclusters.wordToPathByResource.add(h);
                        brownclusters.isLowercaseBrownClustersByResource[i] = isLowercaseBrownClusters.elementAt(i);
                        brownclusters.resources.add(pathsToClusterFiles.elementAt(i));
                        in.close();
                    }
                    logger.info("Loaded brown cluster from " + key + " from Minio system.");
                    clusters.put(key, brownclusters);
                } catch (InvalidPortException | InvalidEndpointException | DatastoreException | FileNotFoundException e) {
                    throw new RuntimeException("Brown Clusters could not be loaded.", e);
                }
            } else {
                // load the clusters from the local file system.
                try {
                    for (int i = 0; i < pathsToClusterFiles.size(); i++) {
                        THashMap<String, String> h = new THashMap<>();
                        // Here we check if local resource is specified.
                        String bcFilePath = pathsToClusterFiles.elementAt(i);
                        InputStream is;
                        is = new FileInputStream(bcFilePath);
                        InFile in = new InFile(is);
                        String line = in.readLine();
                        while (line != null) {
                            StringTokenizer st = new StringTokenizer(line);
                            String path = st.nextToken();
                            String word = st.nextToken();
                            int occ = Integer.parseInt(st.nextToken());
                            if (occ >= thresholds.elementAt(i)) {
                                h.put(word, path);
                            }
                            line = in.readLine();
                        }
                        brownclusters.wordToPathByResource.add(h);
                        brownclusters.isLowercaseBrownClustersByResource[i] = isLowercaseBrownClusters.elementAt(i);
                        brownclusters.resources.add(pathsToClusterFiles.elementAt(i));
                        in.close();
                    }
                    logger.info("Loaded brown cluster from " + key + " from the local file system.");
                    clusters.put(key, brownclusters);
                } catch (FileNotFoundException e) {
                    throw new RuntimeException("Brown Clusters files existed on local disk, but could not be loaded.", e);
                }
            }
        }
    }
    return clusters.get(key);
}
Also used : InFile(edu.illinois.cs.cogcomp.ner.IO.InFile) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) FileNotFoundException(java.io.FileNotFoundException) ResourceConfigurator(edu.illinois.cs.cogcomp.core.resources.ResourceConfigurator) DatastoreException(org.cogcomp.DatastoreException) InvalidPortException(io.minio.errors.InvalidPortException) InvalidEndpointException(io.minio.errors.InvalidEndpointException) FileInputStream(java.io.FileInputStream) StringTokenizer(java.util.StringTokenizer) Datastore(org.cogcomp.Datastore) THashMap(gnu.trove.map.hash.THashMap) File(java.io.File) InFile(edu.illinois.cs.cogcomp.ner.IO.InFile)

Example 2 with InvalidEndpointException

use of io.minio.errors.InvalidEndpointException in project cogcomp-nlp by CogComp.

the class ModelLoader method load.

/**
 * Load the models wherever they are found. Check file system first, then classpath, and finally get it
 * from Minio datastore.
 * @param rm the resource manager.
 * @param training if we are training.
 * @param viewName the name of the view identifies the model.
 * @param cp the parameters for the calling model.
 */
public static void load(ResourceManager rm, String viewName, boolean training, ParametersForLbjCode cp) {
    // the loaded built into the model will check the local file system and the jar files in the classpath.
    String modelPath = cp.pathToModelFile;
    String modelFilePath = modelPath + ".level1";
    java.io.File modelFile = new File(modelFilePath);
    NETaggerLevel1 tagger1 = null;
    NETaggerLevel2 tagger2 = null;
    if (modelFile.exists()) {
        tagger1 = new NETaggerLevel1(modelPath + ".level1", modelPath + ".level1.lex");
        logger.info("Reading L1 model from file : " + modelPath + ".level2");
        if (cp.featuresToUse.containsKey("PredictionsLevel1")) {
            tagger2 = new NETaggerLevel2(modelPath + ".level2", modelPath + ".level2.lex");
            logger.info("Reading L2 model from file : " + modelPath + ".level2");
        } else {
            logger.info("L2 model not required.");
        }
    } else if (IOUtilities.existsInClasspath(NETaggerLevel1.class, modelFilePath)) {
        tagger1 = new NETaggerLevel1(modelPath + ".level1", modelPath + ".level1.lex");
        logger.info("Reading L1 model from classpath : " + modelPath + ".level2");
        if (cp.featuresToUse.containsKey("PredictionsLevel1")) {
            tagger2 = new NETaggerLevel2(modelPath + ".level2", modelPath + ".level2.lex");
            logger.info("Reading L2 model from classpath : " + modelPath + ".level2");
        } else {
            logger.info("L2 model not required.");
        }
    } else if (training) {
        // we are training a new model, so it it doesn't exist, we don't care, just create a
        // container.
        tagger1 = new NETaggerLevel1(modelPath + ".level1", modelPath + ".level1.lex");
        logger.info("Reading L1 model from file : " + modelPath + ".level2");
        if (cp.featuresToUse.containsKey("PredictionsLevel1")) {
            tagger2 = new NETaggerLevel2(modelPath + ".level2", modelPath + ".level2.lex");
            logger.info("Reading L2 model from file : " + modelPath + ".level2");
        } else {
            logger.info("L2 model not required.");
        }
    } else {
        // all else has filed, load from the datastore, create artifact ids based on the view
        // name and training data designation.
        String dataset;
        String lowercaseViewName = viewName.toLowerCase();
        if (lowercaseViewName.contains(ViewNames.NER_CONLL.toLowerCase())) {
            dataset = "enron-conll";
        } else if (lowercaseViewName.contains(ViewNames.NER_ONTONOTES.toLowerCase())) {
            dataset = "ontonotes";
        } else {
            // not a standard model, and we can't find it on the command line.
            throw new IllegalArgumentException("The NER models could not be found at \"" + modelPath + "\", and no default with view name " + viewName);
        }
        String data_split;
        if (!rm.containsKey(NerBaseConfigurator.TRAINED_ON))
            data_split = NerBaseConfigurator.TRAINED_ON_ALL_DATA;
        else
            data_split = rm.getString(NerBaseConfigurator.TRAINED_ON);
        try {
            Datastore ds = new Datastore(new ResourceConfigurator().getConfig(rm));
            String artifact_id = "ner-model-" + dataset + "-" + data_split;
            File modelDir = ds.getDirectory("edu.illinois.cs.cogcomp.ner", artifact_id, 4.0, false);
            String model = "";
            if (modelDir.getPath().contains("conll")) {
                model = modelDir.getPath() + "/model/EnronCoNLL.model";
            } else {
                model = modelDir.getPath() + "/model/OntoNotes.model";
            }
            tagger1 = new NETaggerLevel1(model + ".level1", model + ".level1.lex");
            if (cp.featuresToUse.containsKey("PredictionsLevel1")) {
                tagger2 = new NETaggerLevel2(model + ".level2", model + ".level2.lex");
            }
        } catch (InvalidPortException | DatastoreException | InvalidEndpointException e) {
            e.printStackTrace();
        }
    }
    cp.taggerLevel1 = tagger1;
    cp.taggerLevel2 = tagger2;
}
Also used : NETaggerLevel2(edu.illinois.cs.cogcomp.ner.LbjFeatures.NETaggerLevel2) NETaggerLevel1(edu.illinois.cs.cogcomp.ner.LbjFeatures.NETaggerLevel1) ResourceConfigurator(edu.illinois.cs.cogcomp.core.resources.ResourceConfigurator) DatastoreException(org.cogcomp.DatastoreException) InvalidPortException(io.minio.errors.InvalidPortException) InvalidEndpointException(io.minio.errors.InvalidEndpointException) File(java.io.File) Datastore(org.cogcomp.Datastore) File(java.io.File)

Example 3 with InvalidEndpointException

use of io.minio.errors.InvalidEndpointException in project cogcomp-nlp by CogComp.

the class ExtentReader method getTextAnnotations.

public List<TextAnnotation> getTextAnnotations() throws InvalidPortException, InvalidEndpointException, IOException, JWNLException, DatastoreException {
    List<TextAnnotation> ret = new ArrayList<>();
    if (_corpus.equals("ACE")) {
        ACEReaderWithTrueCaseFixer aceReader = null;
        POSAnnotator posAnnotator = new POSAnnotator();
        try {
            aceReader = new ACEReaderWithTrueCaseFixer(_path, false);
            for (TextAnnotation ta : aceReader) {
                ta.addView(posAnnotator);
                ret.add(ta);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    if (_corpus.equals("ERE")) {
        EREMentionRelationReader ereMentionRelationReader = null;
        POSAnnotator posAnnotator = new POSAnnotator();
        try {
            ereMentionRelationReader = new EREMentionRelationReader(EREDocumentReader.EreCorpus.ENR3, _path, false);
            for (XmlTextAnnotation xta : ereMentionRelationReader) {
                TextAnnotation ta = xta.getTextAnnotation();
                ta.addView(posAnnotator);
                ret.add(ta);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    if (_corpus.startsWith("COMBINED")) {
        String realCorpus = _corpus.split("-")[1];
        String mode = _corpus.split("-")[2];
        int fold = Integer.parseInt(_corpus.split("-")[3]);
        BIOCombinedReader bioCombinedReader = new BIOCombinedReader(fold, realCorpus + "-" + mode, "ALL", true);
        for (Object ta = bioCombinedReader.next(); ta != null; ta = bioCombinedReader.next()) {
            ret.add((TextAnnotation) ta);
        }
    }
    return ret;
}
Also used : EREMentionRelationReader(edu.illinois.cs.cogcomp.nlp.corpusreaders.ereReader.EREMentionRelationReader) ACEReaderWithTrueCaseFixer(edu.illinois.cs.cogcomp.nlp.corpusreaders.ACEReaderWithTrueCaseFixer) POSAnnotator(edu.illinois.cs.cogcomp.pos.POSAnnotator) ArrayList(java.util.ArrayList) DatastoreException(org.cogcomp.DatastoreException) JWNLException(net.didion.jwnl.JWNLException) IOException(java.io.IOException) InvalidEndpointException(io.minio.errors.InvalidEndpointException) InvalidPortException(io.minio.errors.InvalidPortException)

Example 4 with InvalidEndpointException

use of io.minio.errors.InvalidEndpointException in project cogcomp-nlp by CogComp.

the class SenseManager method getLegalSensesMap.

private Map<String, Set<String>> getLegalSensesMap() {
    Map<String, Set<String>> map = new HashMap<>();
    Datastore ds = null;
    File senseFile = null;
    try {
        ds = new Datastore(new ResourceConfigurator().getDefaultConfig());
        senseFile = ds.getFile("edu.illinois.cs.cogcomp.verbsense", "sense-list.txt", 1.0, false);
    } catch (InvalidPortException | InvalidEndpointException | DatastoreException e) {
        e.printStackTrace();
    }
    try {
        for (String line : LineIO.read(senseFile.getAbsolutePath())) {
            String predicate = line.split("\t")[0];
            String[] senseArray = line.split("\t")[1].split(",");
            Set<String> senseSet = new HashSet<>(Arrays.asList(senseArray));
            map.put(predicate, senseSet);
        }
    } catch (FileNotFoundException e) {
        log.error("Unable to load list of legal senses: ", e);
        System.exit(-1);
    }
    return map;
}
Also used : FileNotFoundException(java.io.FileNotFoundException) ResourceConfigurator(edu.illinois.cs.cogcomp.core.resources.ResourceConfigurator) DatastoreException(org.cogcomp.DatastoreException) InvalidPortException(io.minio.errors.InvalidPortException) InvalidEndpointException(io.minio.errors.InvalidEndpointException) Datastore(org.cogcomp.Datastore) File(java.io.File)

Example 5 with InvalidEndpointException

use of io.minio.errors.InvalidEndpointException in project cogcomp-nlp by CogComp.

the class QuestionTypeAnnotator method initialize.

@Override
public void initialize(ResourceManager rm) {
    System.out.println("loading . . . ");
    try {
        Datastore dsNoCredentials = new Datastore(new ResourceConfigurator().getDefaultConfig());
        File f = dsNoCredentials.getDirectory("org.cogcomp.question-typer", "question-typer-models", 1.0, false);
        this.modelsFolder = f.getPath() + "/question-typer-models/";
        System.out.println(modelsFolder + "QuestionFineTyper.lc");
    } catch (InvalidPortException | DatastoreException | InvalidEndpointException e) {
        e.printStackTrace();
    }
    fine = new QuestionFineTyper(modelsFolder + "QuestionFineTyper.lc", modelsFolder + "QuestionFineTyper.lex");
    coarse = new QuestionCoarseTyper(modelsFolder + "QuestionCoarseTyper.lc", modelsFolder + "QuestionCoarseTyper.lex");
}
Also used : Datastore(org.cogcomp.Datastore) QuestionFineTyper(edu.illinois.cs.cogcomp.question_typer.lbjava.QuestionFineTyper) ResourceConfigurator(edu.illinois.cs.cogcomp.core.resources.ResourceConfigurator) DatastoreException(org.cogcomp.DatastoreException) InvalidPortException(io.minio.errors.InvalidPortException) File(java.io.File) InvalidEndpointException(io.minio.errors.InvalidEndpointException) QuestionCoarseTyper(edu.illinois.cs.cogcomp.question_typer.lbjava.QuestionCoarseTyper)

Aggregations

InvalidEndpointException (io.minio.errors.InvalidEndpointException)12 InvalidPortException (io.minio.errors.InvalidPortException)12 DatastoreException (org.cogcomp.DatastoreException)12 Datastore (org.cogcomp.Datastore)11 ResourceConfigurator (edu.illinois.cs.cogcomp.core.resources.ResourceConfigurator)10 File (java.io.File)9 IOException (java.io.IOException)5 ArrayList (java.util.ArrayList)4 FileNotFoundException (java.io.FileNotFoundException)3 JWNLException (net.didion.jwnl.JWNLException)3 Constituent (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)2 TextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)2 WordNetManager (edu.illinois.cs.cogcomp.edison.utilities.WordNetManager)2 BrownClusters (edu.illinois.cs.cogcomp.ner.ExpressiveFeatures.BrownClusters)2 FlatGazetteers (edu.illinois.cs.cogcomp.ner.ExpressiveFeatures.FlatGazetteers)2 StringSplitterInterface (edu.illinois.cs.cogcomp.ner.ExpressiveFeatures.GazetteerTree.StringSplitterInterface)2 Gazetteers (edu.illinois.cs.cogcomp.ner.ExpressiveFeatures.Gazetteers)2 InFile (edu.illinois.cs.cogcomp.ner.IO.InFile)2 POSAnnotator (edu.illinois.cs.cogcomp.pos.POSAnnotator)2 THashMap (gnu.trove.map.hash.THashMap)2