use of org.deeplearning4j.models.sequencevectors.serialization.VocabWordFactory in project deeplearning4j by deeplearning4j.
the class WordVectorSerializer method readVocabCache.
/**
* This method reads vocab cache from provided InputStream.
* Please note: it reads only vocab content, so it's suitable mostly for BagOfWords/TF-IDF vectorizers
*
* @param stream
* @return
* @throws IOException
*/
public static VocabCache<VocabWord> readVocabCache(@NonNull InputStream stream) throws IOException {
BufferedReader reader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
AbstractCache<VocabWord> vocabCache = new AbstractCache.Builder<VocabWord>().build();
VocabWordFactory factory = new VocabWordFactory();
String line = "";
while ((line = reader.readLine()) != null) {
VocabWord word = factory.deserialize(line);
vocabCache.addToken(word);
vocabCache.addWordToIndex(word.getIndex(), word.getLabel());
}
return vocabCache;
}
Aggregations