use of org.deeplearning4j.models.embeddings.wordvectors.WordVectors in project deeplearning4j by deeplearning4j.
the class WordVectorSerializerTest method testLoaderStream.
@Test
public void testLoaderStream() throws IOException {
WordVectors vec = WordVectorSerializer.loadTxtVectors(new FileInputStream(textFile), true);
assertEquals(vec.vocab().numWords(), 30);
assertTrue(vec.vocab().hasToken("Morgan_Freeman"));
assertTrue(vec.vocab().hasToken("JA_Montalbano"));
}
use of org.deeplearning4j.models.embeddings.wordvectors.WordVectors in project deeplearning4j by deeplearning4j.
the class WordVectorSerializerTest method testLoaderText.
@Test
public void testLoaderText() throws IOException {
WordVectors vec = WordVectorSerializer.loadGoogleModel(textFile, false);
assertEquals(vec.vocab().numWords(), 30);
assertTrue(vec.vocab().hasToken("Morgan_Freeman"));
assertTrue(vec.vocab().hasToken("JA_Montalbano"));
}
use of org.deeplearning4j.models.embeddings.wordvectors.WordVectors in project deeplearning4j by deeplearning4j.
the class WordVectorSerializerTest method testOutputStream.
@Test
public void testOutputStream() throws Exception {
File file = File.createTempFile("tmp_ser", "ssa");
file.deleteOnExit();
File inputFile = new ClassPathResource("/big/raw_sentences.txt").getFile();
SentenceIterator iter = new BasicLineIterator(inputFile);
// Split on white spaces in the line to get words
TokenizerFactory t = new DefaultTokenizerFactory();
t.setTokenPreProcessor(new CommonPreprocessor());
InMemoryLookupCache cache = new InMemoryLookupCache(false);
WeightLookupTable table = new InMemoryLookupTable.Builder().vectorLength(100).useAdaGrad(false).negative(5.0).cache(cache).lr(0.025f).build();
Word2Vec vec = new Word2Vec.Builder().minWordFrequency(5).iterations(1).epochs(1).layerSize(100).lookupTable(table).stopWords(new ArrayList<String>()).useAdaGrad(false).negativeSample(5).vocabCache(cache).seed(42).windowSize(5).iterate(iter).tokenizerFactory(t).build();
assertEquals(new ArrayList<String>(), vec.getStopWords());
vec.fit();
INDArray day1 = vec.getWordVectorMatrix("day");
WordVectorSerializer.writeWordVectors(vec, new FileOutputStream(file));
WordVectors vec2 = WordVectorSerializer.loadTxtVectors(file);
INDArray day2 = vec2.getWordVectorMatrix("day");
assertEquals(day1, day2);
File tempFile = File.createTempFile("tetsts", "Fdfs");
tempFile.deleteOnExit();
WordVectorSerializer.writeWord2VecModel(vec, tempFile);
Word2Vec vec3 = WordVectorSerializer.readWord2VecModel(tempFile);
}
use of org.deeplearning4j.models.embeddings.wordvectors.WordVectors in project deeplearning4j by deeplearning4j.
the class WordVectorSerializerTest method testLoader.
@Test
@Ignore
public void testLoader() throws Exception {
WordVectors vec = WordVectorSerializer.loadTxtVectors(new File("/home/raver119/Downloads/_vectors.txt"));
logger.info("Rewinding: " + Arrays.toString(vec.getWordVector("rewinding")));
}
use of org.deeplearning4j.models.embeddings.wordvectors.WordVectors in project deeplearning4j by deeplearning4j.
the class Word2VecTests method testLoadingWordVectors.
@Test
public void testLoadingWordVectors() throws Exception {
File modelFile = new File(pathToWriteto);
if (!modelFile.exists()) {
testRunWord2Vec();
}
WordVectors wordVectors = WordVectorSerializer.loadTxtVectors(modelFile);
Collection<String> lst = wordVectors.wordsNearest("day", 10);
System.out.println(Arrays.toString(lst.toArray()));
}
Aggregations