use of org.deeplearning4j.models.embeddings.loader.VectorsConfiguration in project deeplearning4j by deeplearning4j.
the class SequenceVectorsTest method testGlove1.
@Ignore
@Test
public void testGlove1() throws Exception {
logger.info("Max available memory: " + Runtime.getRuntime().maxMemory());
ClassPathResource resource = new ClassPathResource("big/raw_sentences.txt");
File file = resource.getFile();
BasicLineIterator underlyingIterator = new BasicLineIterator(file);
TokenizerFactory t = new DefaultTokenizerFactory();
t.setTokenPreProcessor(new CommonPreprocessor());
SentenceTransformer transformer = new SentenceTransformer.Builder().iterator(underlyingIterator).tokenizerFactory(t).build();
AbstractSequenceIterator<VocabWord> sequenceIterator = new AbstractSequenceIterator.Builder<>(transformer).build();
VectorsConfiguration configuration = new VectorsConfiguration();
configuration.setWindow(5);
configuration.setLearningRate(0.06);
configuration.setLayersSize(100);
SequenceVectors<VocabWord> vectors = new SequenceVectors.Builder<VocabWord>(configuration).iterate(sequenceIterator).iterations(1).epochs(45).elementsLearningAlgorithm(new GloVe.Builder<VocabWord>().shuffle(true).symmetric(true).learningRate(0.05).alpha(0.75).xMax(100.0).build()).resetModel(true).trainElementsRepresentation(true).trainSequencesRepresentation(false).build();
vectors.fit();
double sim = vectors.similarity("day", "night");
logger.info("Day/night similarity: " + sim);
sim = vectors.similarity("day", "another");
logger.info("Day/another similarity: " + sim);
sim = vectors.similarity("night", "year");
logger.info("Night/year similarity: " + sim);
sim = vectors.similarity("night", "me");
logger.info("Night/me similarity: " + sim);
sim = vectors.similarity("day", "know");
logger.info("Day/know similarity: " + sim);
sim = vectors.similarity("best", "police");
logger.info("Best/police similarity: " + sim);
Collection<String> labels = vectors.wordsNearest("day", 10);
logger.info("Nearest labels to 'day': " + labels);
sim = vectors.similarity("day", "night");
assertTrue(sim > 0.6d);
}
Aggregations