Search in sources :

Example 1 with LdaTrainBatchOp

use of com.alibaba.alink.operator.batch.clustering.LdaTrainBatchOp in project Alink by alibaba.

the class Chap21 method c_7.

private static void c_7() throws Exception {
    BatchOperator<?> docs = getSource().select(LABEL_COL_NAME + ", " + TXT_COL_NAME).link(new SegmentBatchOp().setSelectedCol(TXT_COL_NAME)).link(new StopWordsRemoverBatchOp().setSelectedCol(TXT_COL_NAME));
    docs.lazyPrint(10);
    if (!new File(DATA_DIR + LDA_MODEL_FILE).exists()) {
        LdaTrainBatchOp lda = new LdaTrainBatchOp().setTopicNum(10).setNumIter(200).setVocabSize(20000).setSelectedCol(TXT_COL_NAME).setRandomSeed(123);
        docs.link(lda);
        lda.lazyPrintModelInfo();
        lda.link(new AkSinkBatchOp().setFilePath(DATA_DIR + LDA_MODEL_FILE));
        lda.getSideOutput(0).link(new AkSinkBatchOp().setFilePath(DATA_DIR + LDA_PWZ_FILE));
        BatchOperator.execute();
    }
    new LdaPredictBatchOp().setSelectedCol(TXT_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol("predinfo").linkFrom(new AkSourceBatchOp().setFilePath(DATA_DIR + LDA_MODEL_FILE), docs).lazyPrint(5).link(new EvalClusterBatchOp().setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).lazyPrintMetrics());
    AkSourceBatchOp pwz = new AkSourceBatchOp().setFilePath(DATA_DIR + LDA_PWZ_FILE);
    pwz.sample(0.001).lazyPrint(10);
    for (int t = 0; t < 10; t++) {
        pwz.select("word, topic_" + t).orderBy("topic_" + t, 20, false).lazyPrint(-1, "topic" + t);
    }
    BatchOperator.execute();
}
Also used : AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) StopWordsRemoverBatchOp(com.alibaba.alink.operator.batch.nlp.StopWordsRemoverBatchOp) SegmentBatchOp(com.alibaba.alink.operator.batch.nlp.SegmentBatchOp) LdaTrainBatchOp(com.alibaba.alink.operator.batch.clustering.LdaTrainBatchOp) LdaPredictBatchOp(com.alibaba.alink.operator.batch.clustering.LdaPredictBatchOp) AkSinkBatchOp(com.alibaba.alink.operator.batch.sink.AkSinkBatchOp) File(java.io.File) EvalClusterBatchOp(com.alibaba.alink.operator.batch.evaluation.EvalClusterBatchOp)

Aggregations

LdaPredictBatchOp (com.alibaba.alink.operator.batch.clustering.LdaPredictBatchOp)1 LdaTrainBatchOp (com.alibaba.alink.operator.batch.clustering.LdaTrainBatchOp)1 EvalClusterBatchOp (com.alibaba.alink.operator.batch.evaluation.EvalClusterBatchOp)1 SegmentBatchOp (com.alibaba.alink.operator.batch.nlp.SegmentBatchOp)1 StopWordsRemoverBatchOp (com.alibaba.alink.operator.batch.nlp.StopWordsRemoverBatchOp)1 AkSinkBatchOp (com.alibaba.alink.operator.batch.sink.AkSinkBatchOp)1 AkSourceBatchOp (com.alibaba.alink.operator.batch.source.AkSourceBatchOp)1 File (java.io.File)1