Search in sources :

Example 6 with DatastoreException

use of org.cogcomp.DatastoreException in project cogcomp-nlp by CogComp.

the class ModelLoader method load.

/**
 * Load the models wherever they are found. Check file system first, then classpath, and finally get it
 * from Minio datastore.
 * @param rm the resource manager.
 * @param training if we are training.
 * @param viewName the name of the view identifies the model.
 * @param cp the parameters for the calling model.
 */
public static void load(ResourceManager rm, String viewName, boolean training, ParametersForLbjCode cp) {
    // the loaded built into the model will check the local file system and the jar files in the classpath.
    String modelPath = cp.pathToModelFile;
    String modelFilePath = modelPath + ".level1";
    java.io.File modelFile = new File(modelFilePath);
    NETaggerLevel1 tagger1 = null;
    NETaggerLevel2 tagger2 = null;
    if (modelFile.exists()) {
        tagger1 = new NETaggerLevel1(modelPath + ".level1", modelPath + ".level1.lex");
        logger.info("Reading L1 model from file : " + modelPath + ".level2");
        if (cp.featuresToUse.containsKey("PredictionsLevel1")) {
            tagger2 = new NETaggerLevel2(modelPath + ".level2", modelPath + ".level2.lex");
            logger.info("Reading L2 model from file : " + modelPath + ".level2");
        } else {
            logger.info("L2 model not required.");
        }
    } else if (IOUtilities.existsInClasspath(NETaggerLevel1.class, modelFilePath)) {
        tagger1 = new NETaggerLevel1(modelPath + ".level1", modelPath + ".level1.lex");
        logger.info("Reading L1 model from classpath : " + modelPath + ".level2");
        if (cp.featuresToUse.containsKey("PredictionsLevel1")) {
            tagger2 = new NETaggerLevel2(modelPath + ".level2", modelPath + ".level2.lex");
            logger.info("Reading L2 model from classpath : " + modelPath + ".level2");
        } else {
            logger.info("L2 model not required.");
        }
    } else if (training) {
        // we are training a new model, so it it doesn't exist, we don't care, just create a
        // container.
        tagger1 = new NETaggerLevel1(modelPath + ".level1", modelPath + ".level1.lex");
        logger.info("Reading L1 model from file : " + modelPath + ".level2");
        if (cp.featuresToUse.containsKey("PredictionsLevel1")) {
            tagger2 = new NETaggerLevel2(modelPath + ".level2", modelPath + ".level2.lex");
            logger.info("Reading L2 model from file : " + modelPath + ".level2");
        } else {
            logger.info("L2 model not required.");
        }
    } else {
        // all else has filed, load from the datastore, create artifact ids based on the view
        // name and training data designation.
        String dataset;
        String lowercaseViewName = viewName.toLowerCase();
        if (lowercaseViewName.contains(ViewNames.NER_CONLL.toLowerCase())) {
            dataset = "enron-conll";
        } else if (lowercaseViewName.contains(ViewNames.NER_ONTONOTES.toLowerCase())) {
            dataset = "ontonotes";
        } else {
            // not a standard model, and we can't find it on the command line.
            throw new IllegalArgumentException("The NER models could not be found at \"" + modelPath + "\", and no default with view name " + viewName);
        }
        String data_split;
        if (!rm.containsKey(NerBaseConfigurator.TRAINED_ON))
            data_split = NerBaseConfigurator.TRAINED_ON_ALL_DATA;
        else
            data_split = rm.getString(NerBaseConfigurator.TRAINED_ON);
        try {
            Datastore ds = new Datastore(new ResourceConfigurator().getConfig(rm));
            String artifact_id = "ner-model-" + dataset + "-" + data_split;
            File modelDir = ds.getDirectory("edu.illinois.cs.cogcomp.ner", artifact_id, 4.0, false);
            String model = "";
            if (modelDir.getPath().contains("conll")) {
                model = modelDir.getPath() + "/model/EnronCoNLL.model";
            } else {
                model = modelDir.getPath() + "/model/OntoNotes.model";
            }
            tagger1 = new NETaggerLevel1(model + ".level1", model + ".level1.lex");
            if (cp.featuresToUse.containsKey("PredictionsLevel1")) {
                tagger2 = new NETaggerLevel2(model + ".level2", model + ".level2.lex");
            }
        } catch (InvalidPortException | DatastoreException | InvalidEndpointException e) {
            e.printStackTrace();
        }
    }
    cp.taggerLevel1 = tagger1;
    cp.taggerLevel2 = tagger2;
}
Also used : NETaggerLevel2(edu.illinois.cs.cogcomp.ner.LbjFeatures.NETaggerLevel2) NETaggerLevel1(edu.illinois.cs.cogcomp.ner.LbjFeatures.NETaggerLevel1) ResourceConfigurator(edu.illinois.cs.cogcomp.core.resources.ResourceConfigurator) DatastoreException(org.cogcomp.DatastoreException) InvalidPortException(io.minio.errors.InvalidPortException) InvalidEndpointException(io.minio.errors.InvalidEndpointException) File(java.io.File) Datastore(org.cogcomp.Datastore) File(java.io.File)

Example 7 with DatastoreException

use of org.cogcomp.DatastoreException in project cogcomp-nlp by CogComp.

the class BrownClusters method init.

/**
     * Initialze the brown cluster data. This is a singleton, so this process is sychronized and
     * atomic with resprect to the <code>get()</code> method above.
     * @param pathsToClusterFiles the files containing the data.
     * @param thresholds
     * @param isLowercaseBrownClusters
     */
public static void init(Vector<String> pathsToClusterFiles, Vector<Integer> thresholds, Vector<Boolean> isLowercaseBrownClusters) {
    try {
        Datastore dsNoCredentials = new Datastore(new ResourceConfigurator().getDefaultConfig());
        File gazDirectory = dsNoCredentials.getDirectory("org.cogcomp.brown-clusters", "brown-clusters", 1.5, false);
        synchronized (INIT_SYNC) {
            brownclusters = new BrownClusters();
            brownclusters.isLowercaseBrownClustersByResource = new boolean[isLowercaseBrownClusters.size()];
            brownclusters.wordToPathByResource = new ArrayList<>();
            brownclusters.resources = new ArrayList<>();
            for (int i = 0; i < pathsToClusterFiles.size(); i++) {
                THashMap<String, String> h = new THashMap<>();
                // We used to access the files as resources. Now we are accessing them programmatically.
                // InFile in = new InFile(ResourceUtilities.loadResource(pathsToClusterFiles.elementAt(i)));
                InputStream is = new FileInputStream(gazDirectory.getPath() + File.separator + pathsToClusterFiles.elementAt(i));
                InFile in = new InFile(is);
                String line = in.readLine();
                int wordsAdded = 0;
                while (line != null) {
                    StringTokenizer st = new StringTokenizer(line);
                    String path = st.nextToken();
                    String word = st.nextToken();
                    int occ = Integer.parseInt(st.nextToken());
                    if (occ >= thresholds.elementAt(i)) {
                        h.put(word, path);
                        wordsAdded++;
                    }
                    line = in.readLine();
                }
                if (ParametersForLbjCode.currentParameters.debug) {
                    logger.info(wordsAdded + " words added");
                }
                brownclusters.wordToPathByResource.add(h);
                brownclusters.isLowercaseBrownClustersByResource[i] = isLowercaseBrownClusters.elementAt(i);
                brownclusters.resources.add(pathsToClusterFiles.elementAt(i));
                in.close();
            }
        }
    } catch (InvalidPortException | InvalidEndpointException | DatastoreException | FileNotFoundException e) {
        e.printStackTrace();
    }
}
Also used : InFile(edu.illinois.cs.cogcomp.ner.IO.InFile) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) FileNotFoundException(java.io.FileNotFoundException) ResourceConfigurator(edu.illinois.cs.cogcomp.core.resources.ResourceConfigurator) DatastoreException(org.cogcomp.DatastoreException) InvalidPortException(io.minio.errors.InvalidPortException) InvalidEndpointException(io.minio.errors.InvalidEndpointException) FileInputStream(java.io.FileInputStream) StringTokenizer(java.util.StringTokenizer) Datastore(org.cogcomp.Datastore) THashMap(gnu.trove.map.hash.THashMap) File(java.io.File) InFile(edu.illinois.cs.cogcomp.ner.IO.InFile)

Example 8 with DatastoreException

use of org.cogcomp.DatastoreException in project cogcomp-nlp by CogComp.

the class TreeGazetteers method init.

/**
     * init all the gazetters, mangle each term in a variety of ways.
     *
     * @param pathToDictionaries
     * @param phrase_length the max length of the phrases we will consider.
     * @throws IOException
     */
private void init(int phrase_length, String pathToDictionaries) throws IOException {
    try {
        ArrayList<String> filenames = new ArrayList<>();
        Datastore dsNoCredentials = new Datastore(new ResourceConfigurator().getDefaultConfig());
        File gazDirectory = dsNoCredentials.getDirectory("org.cogcomp.gazetteers", "gazetteers", 1.5, false);
        // We are not loading the resources from classpath anymore. Instead we are calling them programmatically
        // InputStream stream = ResourceUtilities.loadResource(pathToDictionaries + "/gazetteers-list.txt");
        InputStream stream = new FileInputStream(gazDirectory.getPath() + File.separator + "gazetteers" + File.separator + "gazetteers-list.txt");
        BufferedReader br = new BufferedReader(new InputStreamReader(stream));
        String line;
        while ((line = br.readLine()) != null) filenames.add(line);
        // init the dictionaries.
        dictionaries = new ArrayList<>(filenames.size());
        dictionariesIgnoreCase = new ArrayList<>(filenames.size());
        GazetteerTree gaz = new GazetteerTree(phrase_length);
        GazetteerTree gazIC = new GazetteerTree(phrase_length, new StringSplitterInterface() {

            @Override
            public String[] split(String line) {
                String tmp = line.toLowerCase();
                if (tmp.equals("in") || tmp.equals("on") || tmp.equals("us") || tmp.equals("or") || tmp.equals("am"))
                    return new String[0];
                else {
                    // character tokenization for Chinese
                    if (ParametersForLbjCode.currentParameters.language == Language.Chinese) {
                        String[] chars = new String[line.length()];
                        for (int i = 0; i < line.length(); i++) chars[i] = String.valueOf(line.charAt(i));
                        return chars;
                    } else
                        return normalize(line).split("[\\s]+");
                }
            }

            @Override
            public String normalize(String term) {
                return term.toLowerCase();
            }
        });
        // for each dictionary, compile each of the gaz trees for each phrase permutation.
        for (String file : filenames) {
            String fileName = gazDirectory.getAbsolutePath() + File.separator + file;
            gaz.readDictionary(file, "", ResourceUtilities.loadResource(fileName));
            gazIC.readDictionary(file, "(IC)", ResourceUtilities.loadResource(fileName));
        }
        gaz.trimToSize();
        gazIC.trimToSize();
        dictionaries.add(gaz);
        dictionariesIgnoreCase.add(gazIC);
        if (ParametersForLbjCode.currentParameters.debug) {
            logger.info("found " + dictionaries.size() + " gazetteers");
        }
    } catch (InvalidPortException | InvalidEndpointException e) {
        e.printStackTrace();
    } catch (DatastoreException e) {
        e.printStackTrace();
    }
}
Also used : ArrayList(java.util.ArrayList) ResourceConfigurator(edu.illinois.cs.cogcomp.core.resources.ResourceConfigurator) DatastoreException(org.cogcomp.DatastoreException) InvalidPortException(io.minio.errors.InvalidPortException) InvalidEndpointException(io.minio.errors.InvalidEndpointException) Datastore(org.cogcomp.Datastore) StringSplitterInterface(edu.illinois.cs.cogcomp.ner.ExpressiveFeatures.GazetteerTree.StringSplitterInterface)

Example 9 with DatastoreException

use of org.cogcomp.DatastoreException in project cogcomp-nlp by CogComp.

the class TransliterationAnnotator method initialize.

@Override
public void initialize(ResourceManager rm) {
    try {
        Datastore dsNoCredentials = new Datastore(new ResourceConfigurator().getDefaultConfig());
        File f = dsNoCredentials.getDirectory("org.cogcomp.transliteration", "transliteration-models", 1.3, false);
        String modelPath = f.getAbsolutePath() + File.separator + "transliteration-models-oct-2017" + File.separator + "probs-" + lang.getCode() + ".txt";
        if (new File(modelPath).exists()) {
            logger.info("Loading transliteration models for language: " + lang + " from " + modelPath);
            model = new SPModel(modelPath);
            model.setMaxCandidates(1);
        } else {
            logger.error("Model for language: " + lang + " don't exist: " + modelPath);
        }
    } catch (IOException | InvalidEndpointException | DatastoreException | InvalidPortException e) {
        e.printStackTrace();
    }
}
Also used : Datastore(org.cogcomp.Datastore) SPModel(edu.illinois.cs.cogcomp.transliteration.SPModel) ResourceConfigurator(edu.illinois.cs.cogcomp.core.resources.ResourceConfigurator) IOException(java.io.IOException) DatastoreException(org.cogcomp.DatastoreException) InvalidPortException(io.minio.errors.InvalidPortException) File(java.io.File) InvalidEndpointException(io.minio.errors.InvalidEndpointException)

Example 10 with DatastoreException

use of org.cogcomp.DatastoreException in project cogcomp-nlp by CogComp.

the class WordSim method getFile.

public File getFile(String method) {
    try {
        ResourceManager rm = new ResourceConfigurator().getDefaultConfig();
        ds = new Datastore(rm.getString("datastoreEndpoint"));
    } catch (DatastoreException e1) {
        // TODO Auto-generated catch block
        e1.printStackTrace();
    }
    File f = null;
    if (method.equals(EmbeddingConstant.word2vec)) {
        try {
            f = ds.getFile("org.cogcomp.wordembedding", "word2vec.txt", 1.5);
        } catch (DatastoreException e) {
            e.printStackTrace();
        }
    } else if (method.equals(EmbeddingConstant.glove)) {
        try {
            f = ds.getFile("org.cogcomp.wordembedding", "glove.txt", 1.5);
        } catch (DatastoreException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    } else if (method.equals(EmbeddingConstant.phrase2vec)) {
        try {
            f = ds.getFile("org.cogcomp.wordembedding", "phrase2vec.txt", 1.5);
        } catch (DatastoreException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    } else if (method.equals(EmbeddingConstant.memorybasedESA)) {
        try {
            f = ds.getFile("org.cogcomp.wordembedding", "memorybasedESA.txt", 1.5);
        } catch (DatastoreException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    } else if (method.equals(EmbeddingConstant.pageIDMapping)) {
        try {
            f = ds.getFile("org.cogcomp.wordembedding", "pageIDMapping.txt", 1.5);
        } catch (DatastoreException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }
    return f;
}
Also used : Datastore(org.cogcomp.Datastore) ResourceManager(edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager) ResourceConfigurator(edu.illinois.cs.cogcomp.core.resources.ResourceConfigurator) DatastoreException(org.cogcomp.DatastoreException) File(java.io.File)

Aggregations

DatastoreException (org.cogcomp.DatastoreException)14 InvalidEndpointException (io.minio.errors.InvalidEndpointException)12 InvalidPortException (io.minio.errors.InvalidPortException)12 Datastore (org.cogcomp.Datastore)12 ResourceConfigurator (edu.illinois.cs.cogcomp.core.resources.ResourceConfigurator)11 File (java.io.File)10 IOException (java.io.IOException)5 ArrayList (java.util.ArrayList)4 FileNotFoundException (java.io.FileNotFoundException)3 JWNLException (net.didion.jwnl.JWNLException)3 Constituent (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)2 TextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)2 WordNetManager (edu.illinois.cs.cogcomp.edison.utilities.WordNetManager)2 BrownClusters (edu.illinois.cs.cogcomp.ner.ExpressiveFeatures.BrownClusters)2 FlatGazetteers (edu.illinois.cs.cogcomp.ner.ExpressiveFeatures.FlatGazetteers)2 StringSplitterInterface (edu.illinois.cs.cogcomp.ner.ExpressiveFeatures.GazetteerTree.StringSplitterInterface)2 Gazetteers (edu.illinois.cs.cogcomp.ner.ExpressiveFeatures.Gazetteers)2 InFile (edu.illinois.cs.cogcomp.ner.IO.InFile)2 POSAnnotator (edu.illinois.cs.cogcomp.pos.POSAnnotator)2 THashMap (gnu.trove.map.hash.THashMap)2