Search in sources :

Example 1 with DenseVector

use of edu.illinois.cs.cogcomp.datalessclassification.util.DenseVector in project cogcomp-nlp by CogComp.

the class MemoryBasedW2V method loadVectors.

/**
 * Loads up Word2Vec embeddings lazily
 */
private void loadVectors() {
    if (vectors == null) {
        File inputFile = null;
        try {
            inputFile = getFile();
        } catch (DatastoreException e) {
            e.printStackTrace();
            logger.error("Error retrieving the embedding file from DataStore");
            throw new RuntimeException("Error retrieving the embedding file from DataStore");
        }
        try (BufferedReader bf = new BufferedReader(new FileReader(inputFile))) {
            logger.info("Reading Word2vec Embeddings from " + inputFile.getAbsolutePath());
            vectors = new HashMap<>();
            String line = bf.readLine();
            String[] tokens = line.split(" ");
            // The first line has the following schema --> #Terms #Vector_Dimensions
            int dimNum = Integer.parseInt(tokens[1].trim());
            if (dimNum != dimensions) {
                bf.close();
                throw new IllegalStateException("Number of dimensions in the embeddings file (" + dimNum + ") don't match the one in the config file (" + dimensions + ")");
            }
            int count = 0;
            while ((line = bf.readLine()) != null) {
                line = line.trim();
                if (line.length() == 0)
                    continue;
                tokens = line.trim().split(" ", 2);
                String[] stringVec = tokens[1].split(" ");
                if (stringVec.length != dimNum) {
                    bf.close();
                    throw new IllegalStateException("Possible Error in the embeddings file -- number of dimensions(" + dimNum + ") don't match -->" + tokens[1]);
                }
                String word = tokens[0].trim();
                if (word.length() == 0)
                    continue;
                double[] scores = new double[dimNum];
                int i = 0;
                for (String dim : stringVec) {
                    scores[i] = Double.parseDouble(dim);
                    i++;
                }
                DenseVector vec = new DenseVector(scores);
                vectors.put(word, vec);
                count++;
                if (count % 100000 == 0)
                    logger.info("#W2V embeddings read: " + count);
            }
        } catch (IOException e) {
            e.printStackTrace();
            logger.error("IO Error while reading the W2V Embedding File");
            throw new RuntimeException("IO Error while reading the W2V Embedding File");
        } catch (IllegalStateException e) {
            e.printStackTrace();
            logger.error(e.getMessage());
            throw new RuntimeException(e.getMessage());
        }
    }
}
Also used : DatastoreException(org.cogcomp.DatastoreException) DenseVector(edu.illinois.cs.cogcomp.datalessclassification.util.DenseVector)

Example 2 with DenseVector

use of edu.illinois.cs.cogcomp.datalessclassification.util.DenseVector in project cogcomp-nlp by CogComp.

the class MemoryBasedW2V method getDenseVectorBasedOnTermWeights.

/**
 * Returns a DenseVector for the SparseVector obtained from getConceptVectorBasedOnTermWeights
 */
public DenseVector getDenseVectorBasedOnTermWeights(HashMap<String, Double> termWeights) {
    SparseVector<Integer> conceptMap = getConceptVectorBasedOnTermWeights(termWeights);
    DenseVector vec = DenseVector.createDenseVector(conceptMap);
    return vec;
}
Also used : DenseVector(edu.illinois.cs.cogcomp.datalessclassification.util.DenseVector)

Example 3 with DenseVector

use of edu.illinois.cs.cogcomp.datalessclassification.util.DenseVector in project cogcomp-nlp by CogComp.

the class MemoryBasedW2V method getDenseTermVector.

/**
 * Returns a DenseVector for the SparseVector obtained from getTermConceptVectorMap
 */
public DenseVector getDenseTermVector(String term) {
    SparseVector<Integer> conceptMap = getTermConceptVectorMap(term);
    DenseVector vec = DenseVector.createDenseVector(conceptMap);
    return vec;
}
Also used : DenseVector(edu.illinois.cs.cogcomp.datalessclassification.util.DenseVector)

Example 4 with DenseVector

use of edu.illinois.cs.cogcomp.datalessclassification.util.DenseVector in project cogcomp-nlp by CogComp.

the class MemoryBasedW2V method getDenseVectorBasedOnSegmentation.

/**
 * Overloads getDenseVectorBasedOnSegmentation to provide support for switching on/off
 * term frequency weighting while composing the term vectors
 */
public DenseVector getDenseVectorBasedOnSegmentation(String query, boolean ignoreTermFreq) {
    SparseVector<Integer> conceptMap = getConceptVectorBasedOnSegmentation(query, ignoreTermFreq);
    DenseVector vec = DenseVector.createDenseVector(conceptMap);
    return vec;
}
Also used : DenseVector(edu.illinois.cs.cogcomp.datalessclassification.util.DenseVector)

Example 5 with DenseVector

use of edu.illinois.cs.cogcomp.datalessclassification.util.DenseVector in project cogcomp-nlp by CogComp.

the class MemoryBasedW2V method getDefaultDenseTermVector.

/**
 * Returns a DenseVector for the SparseVector obtained from getDefaultConceptVectorMap
 */
public DenseVector getDefaultDenseTermVector() {
    SparseVector<Integer> conceptMap = getDefaultConceptVectorMap();
    DenseVector vec = DenseVector.createDenseVector(conceptMap);
    return vec;
}
Also used : DenseVector(edu.illinois.cs.cogcomp.datalessclassification.util.DenseVector)

Aggregations

DenseVector (edu.illinois.cs.cogcomp.datalessclassification.util.DenseVector)5 DatastoreException (org.cogcomp.DatastoreException)1