use of edu.illinois.cs.cogcomp.datalessclassification.util.DenseVector in project cogcomp-nlp by CogComp.
the class MemoryBasedW2V method loadVectors.
/**
* Loads up Word2Vec embeddings lazily
*/
private void loadVectors() {
if (vectors == null) {
File inputFile = null;
try {
inputFile = getFile();
} catch (DatastoreException e) {
e.printStackTrace();
logger.error("Error retrieving the embedding file from DataStore");
throw new RuntimeException("Error retrieving the embedding file from DataStore");
}
try (BufferedReader bf = new BufferedReader(new FileReader(inputFile))) {
logger.info("Reading Word2vec Embeddings from " + inputFile.getAbsolutePath());
vectors = new HashMap<>();
String line = bf.readLine();
String[] tokens = line.split(" ");
// The first line has the following schema --> #Terms #Vector_Dimensions
int dimNum = Integer.parseInt(tokens[1].trim());
if (dimNum != dimensions) {
bf.close();
throw new IllegalStateException("Number of dimensions in the embeddings file (" + dimNum + ") don't match the one in the config file (" + dimensions + ")");
}
int count = 0;
while ((line = bf.readLine()) != null) {
line = line.trim();
if (line.length() == 0)
continue;
tokens = line.trim().split(" ", 2);
String[] stringVec = tokens[1].split(" ");
if (stringVec.length != dimNum) {
bf.close();
throw new IllegalStateException("Possible Error in the embeddings file -- number of dimensions(" + dimNum + ") don't match -->" + tokens[1]);
}
String word = tokens[0].trim();
if (word.length() == 0)
continue;
double[] scores = new double[dimNum];
int i = 0;
for (String dim : stringVec) {
scores[i] = Double.parseDouble(dim);
i++;
}
DenseVector vec = new DenseVector(scores);
vectors.put(word, vec);
count++;
if (count % 100000 == 0)
logger.info("#W2V embeddings read: " + count);
}
} catch (IOException e) {
e.printStackTrace();
logger.error("IO Error while reading the W2V Embedding File");
throw new RuntimeException("IO Error while reading the W2V Embedding File");
} catch (IllegalStateException e) {
e.printStackTrace();
logger.error(e.getMessage());
throw new RuntimeException(e.getMessage());
}
}
}
use of edu.illinois.cs.cogcomp.datalessclassification.util.DenseVector in project cogcomp-nlp by CogComp.
the class MemoryBasedW2V method getDenseVectorBasedOnTermWeights.
/**
* Returns a DenseVector for the SparseVector obtained from getConceptVectorBasedOnTermWeights
*/
public DenseVector getDenseVectorBasedOnTermWeights(HashMap<String, Double> termWeights) {
SparseVector<Integer> conceptMap = getConceptVectorBasedOnTermWeights(termWeights);
DenseVector vec = DenseVector.createDenseVector(conceptMap);
return vec;
}
use of edu.illinois.cs.cogcomp.datalessclassification.util.DenseVector in project cogcomp-nlp by CogComp.
the class MemoryBasedW2V method getDenseTermVector.
/**
* Returns a DenseVector for the SparseVector obtained from getTermConceptVectorMap
*/
public DenseVector getDenseTermVector(String term) {
SparseVector<Integer> conceptMap = getTermConceptVectorMap(term);
DenseVector vec = DenseVector.createDenseVector(conceptMap);
return vec;
}
use of edu.illinois.cs.cogcomp.datalessclassification.util.DenseVector in project cogcomp-nlp by CogComp.
the class MemoryBasedW2V method getDenseVectorBasedOnSegmentation.
/**
* Overloads getDenseVectorBasedOnSegmentation to provide support for switching on/off
* term frequency weighting while composing the term vectors
*/
public DenseVector getDenseVectorBasedOnSegmentation(String query, boolean ignoreTermFreq) {
SparseVector<Integer> conceptMap = getConceptVectorBasedOnSegmentation(query, ignoreTermFreq);
DenseVector vec = DenseVector.createDenseVector(conceptMap);
return vec;
}
use of edu.illinois.cs.cogcomp.datalessclassification.util.DenseVector in project cogcomp-nlp by CogComp.
the class MemoryBasedW2V method getDefaultDenseTermVector.
/**
* Returns a DenseVector for the SparseVector obtained from getDefaultConceptVectorMap
*/
public DenseVector getDefaultDenseTermVector() {
SparseVector<Integer> conceptMap = getDefaultConceptVectorMap();
DenseVector vec = DenseVector.createDenseVector(conceptMap);
return vec;
}
Aggregations