use of org.deeplearning4j.spark.models.embeddings.word2vec.Word2Vec in project deeplearning4j by deeplearning4j.
the class TextPipelineTest method before.
@Before
public void before() throws Exception {
conf = new SparkConf().setMaster("local[4]").setAppName("sparktest");
// All the avaliable options. These are default values
word2vec = new Word2Vec.Builder().minWordFrequency(1).setNGrams(1).tokenizerFactory("org.deeplearning4j.text.tokenization.tokenizerfactory.DefaultTokenizerFactory").tokenPreprocessor("org.deeplearning4j.text.tokenization.tokenizer.preprocessor.CommonPreprocessor").stopWords(StopWords.getStopWords()).seed(42L).negative(0).useAdaGrad(false).layerSize(100).windowSize(5).learningRate(0.025).minLearningRate(0.0001).iterations(1).build();
word2vecNoStop = new Word2Vec.Builder().minWordFrequency(1).setNGrams(1).tokenizerFactory("org.deeplearning4j.text.tokenization.tokenizerfactory.DefaultTokenizerFactory").tokenPreprocessor("org.deeplearning4j.text.tokenization.tokenizer.preprocessor.CommonPreprocessor").seed(42L).negative(0).useAdaGrad(false).layerSize(100).windowSize(5).learningRate(0.025).minLearningRate(0.0001).iterations(1).build();
sentenceList = Arrays.asList("This is a strange strange world.", "Flowers are red.");
}