Search in sources :

Example 1 with Stopwatch

use of com.alibaba.alink.common.utils.Stopwatch in project Alink by alibaba.

the class Chap19 method c_3.

static void c_3() throws Exception {
    AkSourceBatchOp source = new AkSourceBatchOp().setFilePath(DATA_DIR + SPARSE_TRAIN_FILE);
    source.link(new PcaTrainBatchOp().setK(39).setCalculationType(CalculationType.COV).setVectorCol(VECTOR_COL_NAME).lazyPrintModelInfo()).link(new AkSinkBatchOp().setFilePath(DATA_DIR + PCA_MODEL_FILE).setOverwriteSink(true));
    BatchOperator.execute();
    BatchOperator<?> pca_result = new PcaPredictBatchOp().setVectorCol(VECTOR_COL_NAME).setPredictionCol(VECTOR_COL_NAME).linkFrom(new AkSourceBatchOp().setFilePath(DATA_DIR + PCA_MODEL_FILE), source);
    Stopwatch sw = new Stopwatch();
    KMeans kmeans = new KMeans().setK(10).setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME);
    sw.reset();
    sw.start();
    kmeans.fit(source).transform(source).link(new EvalClusterBatchOp().setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setLabelCol(LABEL_COL_NAME).lazyPrintMetrics("KMeans"));
    BatchOperator.execute();
    sw.stop();
    System.out.println(sw.getElapsedTimeSpan());
    sw.reset();
    sw.start();
    kmeans.fit(pca_result).transform(pca_result).link(new EvalClusterBatchOp().setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setLabelCol(LABEL_COL_NAME).lazyPrintMetrics("KMeans + PCA"));
    BatchOperator.execute();
    sw.stop();
    System.out.println(sw.getElapsedTimeSpan());
}
Also used : AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) KMeans(com.alibaba.alink.pipeline.clustering.KMeans) PcaPredictBatchOp(com.alibaba.alink.operator.batch.feature.PcaPredictBatchOp) PcaTrainBatchOp(com.alibaba.alink.operator.batch.feature.PcaTrainBatchOp) Stopwatch(com.alibaba.alink.common.utils.Stopwatch) AkSinkBatchOp(com.alibaba.alink.operator.batch.sink.AkSinkBatchOp) EvalClusterBatchOp(com.alibaba.alink.operator.batch.evaluation.EvalClusterBatchOp)

Example 2 with Stopwatch

use of com.alibaba.alink.common.utils.Stopwatch in project Alink by alibaba.

the class Chap21 method c_6_2.

private static void c_6_2() throws Exception {
    BatchOperator.setParallelism(2);
    Row[] rows = new Row[] { Row.of("林徽因什么理由拒绝了徐志摩而选择梁思成为终身伴侣"), Row.of("发酵床的垫料种类有哪些?哪种更好?"), Row.of("京城最值得你来场文化之旅的博物馆"), Row.of("什么是超写实绘画?") };
    MemSourceBatchOp target = new MemSourceBatchOp(rows, new String[] { TXT_COL_NAME });
    BatchOperator<?> source = getSource();
    for (String metric : new String[] { "LEVENSHTEIN", "LCS", "SSK", "COSINE" }) {
        new StringNearestNeighbor().setMetric(metric).setSelectedCol(TXT_COL_NAME).setIdCol(TXT_COL_NAME).setTopN(5).setOutputCol("similar_titles").fit(source).transform(target).lazyPrint(-1, "StringNeareastNeighbor + " + metric.toString());
        BatchOperator.execute();
    }
    for (String metric : new String[] { "LEVENSHTEIN", "LCS", "SSK", "COSINE" }) {
        new Pipeline().add(new Segment().setSelectedCol(TXT_COL_NAME).setOutputCol("segmented_title")).add(new TextNearestNeighbor().setMetric(metric).setSelectedCol("segmented_title").setIdCol(TXT_COL_NAME).setTopN(5).setOutputCol("similar_titles")).fit(source).transform(target).lazyPrint(-1, "TextNeareastNeighbor + " + metric.toString());
        BatchOperator.execute();
    }
    for (String metric : new String[] { "JACCARD_SIM", "MINHASH_JACCARD_SIM", "SIMHASH_HAMMING_SIM" }) {
        new StringApproxNearestNeighbor().setMetric(metric).setSelectedCol(TXT_COL_NAME).setIdCol(TXT_COL_NAME).setTopN(5).setOutputCol("similar_titles").fit(source).transform(target).lazyPrint(-1, "StringApproxNeareastNeighbor + " + metric.toString());
        BatchOperator.execute();
    }
    for (String metric : new String[] { "JACCARD_SIM", "MINHASH_JACCARD_SIM", "SIMHASH_HAMMING_SIM" }) {
        new Pipeline().add(new Segment().setSelectedCol(TXT_COL_NAME).setOutputCol("segmented_title")).add(new TextApproxNearestNeighbor().setMetric(metric).setSelectedCol("segmented_title").setIdCol(TXT_COL_NAME).setTopN(5).setOutputCol("similar_titles")).fit(source).transform(target).lazyPrint(-1, "TextApproxNeareastNeighbor + " + metric.toString());
        BatchOperator.execute();
    }
    Pipeline snn = new Pipeline().add(new StringNearestNeighbor().setMetric("LEVENSHTEIN").setSelectedCol(TXT_COL_NAME).setIdCol(TXT_COL_NAME).setTopN(5).setOutputCol("similar_titles"));
    Pipeline approx_snn = new Pipeline().add(new StringApproxNearestNeighbor().setMetric("JACCARD_SIM").setSelectedCol(TXT_COL_NAME).setIdCol(TXT_COL_NAME).setTopN(5).setOutputCol("similar_titles"));
    Stopwatch sw = new Stopwatch();
    if (!new File(DATA_DIR + SNN_MODEL_FILE).exists()) {
        sw.reset();
        sw.start();
        snn.fit(source).save(DATA_DIR + SNN_MODEL_FILE);
        BatchOperator.execute();
        sw.stop();
        System.out.println(sw.getElapsedTimeSpan());
    }
    if (!new File(DATA_DIR + APPROX_SNN_MODEL_FILE).exists()) {
        sw.reset();
        sw.start();
        approx_snn.fit(source).save(DATA_DIR + APPROX_SNN_MODEL_FILE);
        BatchOperator.execute();
        sw.stop();
        System.out.println(sw.getElapsedTimeSpan());
    }
    BatchOperator<?> target_stock = source.filter("category_name = 'stock'");
    BatchOperator<?> target_news_story = source.filter("category_name = 'news_story'");
    sw.reset();
    sw.start();
    PipelineModel.load(DATA_DIR + SNN_MODEL_FILE).transform(target_stock).lazyPrint(10, "StringNeareastNeighbor + LEVENSHTEIN");
    BatchOperator.execute();
    sw.stop();
    System.out.println(sw.getElapsedTimeSpan());
    sw.reset();
    sw.start();
    PipelineModel.load(DATA_DIR + APPROX_SNN_MODEL_FILE).transform(target_stock).lazyPrint(10, "JACCARD_SIM + stock");
    BatchOperator.execute();
    sw.stop();
    System.out.println(sw.getElapsedTimeSpan());
    sw.reset();
    sw.start();
    PipelineModel.load(DATA_DIR + APPROX_SNN_MODEL_FILE).transform(target_news_story).lazyPrint(10, "JACCARD_SIM + news_story");
    BatchOperator.execute();
    sw.stop();
    System.out.println(sw.getElapsedTimeSpan());
    StreamOperator.setParallelism(1);
    StreamOperator<?> stream_target = new MemSourceStreamOp(rows, new String[] { TXT_COL_NAME });
    PipelineModel.load(DATA_DIR + SNN_MODEL_FILE).transform(stream_target).print();
    StreamOperator.execute();
    StreamOperator<?> stream_target_stock = getStreamSource().filter("category_name = 'stock'");
    sw.reset();
    sw.start();
    PipelineModel.load(DATA_DIR + APPROX_SNN_MODEL_FILE).transform(stream_target_stock).sample(0.02).print();
    StreamOperator.execute();
    sw.stop();
    System.out.println(sw.getElapsedTimeSpan());
}
Also used : MemSourceStreamOp(com.alibaba.alink.operator.stream.source.MemSourceStreamOp) TextApproxNearestNeighbor(com.alibaba.alink.pipeline.similarity.TextApproxNearestNeighbor) StringApproxNearestNeighbor(com.alibaba.alink.pipeline.similarity.StringApproxNearestNeighbor) Stopwatch(com.alibaba.alink.common.utils.Stopwatch) Segment(com.alibaba.alink.pipeline.nlp.Segment) Pipeline(com.alibaba.alink.pipeline.Pipeline) MemSourceBatchOp(com.alibaba.alink.operator.batch.source.MemSourceBatchOp) Row(org.apache.flink.types.Row) TextNearestNeighbor(com.alibaba.alink.pipeline.similarity.TextNearestNeighbor) File(java.io.File) StringNearestNeighbor(com.alibaba.alink.pipeline.similarity.StringNearestNeighbor)

Example 3 with Stopwatch

use of com.alibaba.alink.common.utils.Stopwatch in project Alink by alibaba.

the class Chap25 method c_3.

static void c_3() throws Exception {
    Stopwatch sw = new Stopwatch();
    sw.start();
    AlinkGlobalConfiguration.setPrintProcessInfo(true);
    AkSourceBatchOp train_set = new AkSourceBatchOp().setFilePath(Chap16.DATA_DIR + Chap16.TRAIN_FILE);
    AkSourceBatchOp test_set = new AkSourceBatchOp().setFilePath(Chap16.DATA_DIR + Chap16.TEST_FILE);
    linearReg(train_set, test_set);
    dnnReg(train_set, test_set);
    sw.stop();
    System.out.println(sw.getElapsedTimeSpan());
}
Also used : AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) Stopwatch(com.alibaba.alink.common.utils.Stopwatch)

Example 4 with Stopwatch

use of com.alibaba.alink.common.utils.Stopwatch in project Alink by alibaba.

the class Chap25 method c_2.

static void c_2() throws Exception {
    Stopwatch sw = new Stopwatch();
    sw.start();
    AlinkGlobalConfiguration.setPrintProcessInfo(true);
    AkSourceBatchOp train_set = new AkSourceBatchOp().setFilePath(Chap13.DATA_DIR + Chap13.DENSE_TRAIN_FILE);
    AkSourceBatchOp test_set = new AkSourceBatchOp().setFilePath(Chap13.DATA_DIR + Chap13.DENSE_TEST_FILE);
    softmax(train_set, test_set);
    dnn(train_set, test_set);
    cnn(train_set, test_set);
    sw.stop();
    System.out.println(sw.getElapsedTimeSpan());
}
Also used : AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) Stopwatch(com.alibaba.alink.common.utils.Stopwatch)

Example 5 with Stopwatch

use of com.alibaba.alink.common.utils.Stopwatch in project Alink by alibaba.

the class Chap05 method main.

public static void main(String[] args) throws Exception {
    Stopwatch sw = new Stopwatch();
    sw.start();
    BatchOperator.setParallelism(1);
    c_1();
    c_2_1();
    c_2_2();
    c_2_3();
    c_3();
    sw.stop();
    System.out.println(sw.getElapsedTimeSpan());
}
Also used : Stopwatch(com.alibaba.alink.common.utils.Stopwatch)

Aggregations

Stopwatch (com.alibaba.alink.common.utils.Stopwatch)10 AkSourceBatchOp (com.alibaba.alink.operator.batch.source.AkSourceBatchOp)8 EvalClusterBatchOp (com.alibaba.alink.operator.batch.evaluation.EvalClusterBatchOp)3 Pipeline (com.alibaba.alink.pipeline.Pipeline)3 KMeans (com.alibaba.alink.pipeline.clustering.KMeans)3 EvalMultiClassBatchOp (com.alibaba.alink.operator.batch.evaluation.EvalMultiClassBatchOp)2 AkSinkBatchOp (com.alibaba.alink.operator.batch.sink.AkSinkBatchOp)2 File (java.io.File)2 BatchOperator (com.alibaba.alink.operator.batch.BatchOperator)1 EvalBinaryClassBatchOp (com.alibaba.alink.operator.batch.evaluation.EvalBinaryClassBatchOp)1 PcaPredictBatchOp (com.alibaba.alink.operator.batch.feature.PcaPredictBatchOp)1 PcaTrainBatchOp (com.alibaba.alink.operator.batch.feature.PcaTrainBatchOp)1 MemSourceBatchOp (com.alibaba.alink.operator.batch.source.MemSourceBatchOp)1 MemSourceStreamOp (com.alibaba.alink.operator.stream.source.MemSourceStreamOp)1 DistanceType (com.alibaba.alink.params.shared.clustering.HasKMeansDistanceType.DistanceType)1 TreeType (com.alibaba.alink.params.shared.tree.HasIndividualTreeType.TreeType)1 DecisionTreeClassifier (com.alibaba.alink.pipeline.classification.DecisionTreeClassifier)1 GbdtClassifier (com.alibaba.alink.pipeline.classification.GbdtClassifier)1 KnnClassifier (com.alibaba.alink.pipeline.classification.KnnClassifier)1 RandomForestClassifier (com.alibaba.alink.pipeline.classification.RandomForestClassifier)1