use of com.alibaba.alink.common.utils.Stopwatch in project Alink by alibaba.
the class Chap19 method c_3.
static void c_3() throws Exception {
AkSourceBatchOp source = new AkSourceBatchOp().setFilePath(DATA_DIR + SPARSE_TRAIN_FILE);
source.link(new PcaTrainBatchOp().setK(39).setCalculationType(CalculationType.COV).setVectorCol(VECTOR_COL_NAME).lazyPrintModelInfo()).link(new AkSinkBatchOp().setFilePath(DATA_DIR + PCA_MODEL_FILE).setOverwriteSink(true));
BatchOperator.execute();
BatchOperator<?> pca_result = new PcaPredictBatchOp().setVectorCol(VECTOR_COL_NAME).setPredictionCol(VECTOR_COL_NAME).linkFrom(new AkSourceBatchOp().setFilePath(DATA_DIR + PCA_MODEL_FILE), source);
Stopwatch sw = new Stopwatch();
KMeans kmeans = new KMeans().setK(10).setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME);
sw.reset();
sw.start();
kmeans.fit(source).transform(source).link(new EvalClusterBatchOp().setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setLabelCol(LABEL_COL_NAME).lazyPrintMetrics("KMeans"));
BatchOperator.execute();
sw.stop();
System.out.println(sw.getElapsedTimeSpan());
sw.reset();
sw.start();
kmeans.fit(pca_result).transform(pca_result).link(new EvalClusterBatchOp().setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setLabelCol(LABEL_COL_NAME).lazyPrintMetrics("KMeans + PCA"));
BatchOperator.execute();
sw.stop();
System.out.println(sw.getElapsedTimeSpan());
}
use of com.alibaba.alink.common.utils.Stopwatch in project Alink by alibaba.
the class Chap21 method c_6_2.
private static void c_6_2() throws Exception {
BatchOperator.setParallelism(2);
Row[] rows = new Row[] { Row.of("林徽因什么理由拒绝了徐志摩而选择梁思成为终身伴侣"), Row.of("发酵床的垫料种类有哪些?哪种更好?"), Row.of("京城最值得你来场文化之旅的博物馆"), Row.of("什么是超写实绘画?") };
MemSourceBatchOp target = new MemSourceBatchOp(rows, new String[] { TXT_COL_NAME });
BatchOperator<?> source = getSource();
for (String metric : new String[] { "LEVENSHTEIN", "LCS", "SSK", "COSINE" }) {
new StringNearestNeighbor().setMetric(metric).setSelectedCol(TXT_COL_NAME).setIdCol(TXT_COL_NAME).setTopN(5).setOutputCol("similar_titles").fit(source).transform(target).lazyPrint(-1, "StringNeareastNeighbor + " + metric.toString());
BatchOperator.execute();
}
for (String metric : new String[] { "LEVENSHTEIN", "LCS", "SSK", "COSINE" }) {
new Pipeline().add(new Segment().setSelectedCol(TXT_COL_NAME).setOutputCol("segmented_title")).add(new TextNearestNeighbor().setMetric(metric).setSelectedCol("segmented_title").setIdCol(TXT_COL_NAME).setTopN(5).setOutputCol("similar_titles")).fit(source).transform(target).lazyPrint(-1, "TextNeareastNeighbor + " + metric.toString());
BatchOperator.execute();
}
for (String metric : new String[] { "JACCARD_SIM", "MINHASH_JACCARD_SIM", "SIMHASH_HAMMING_SIM" }) {
new StringApproxNearestNeighbor().setMetric(metric).setSelectedCol(TXT_COL_NAME).setIdCol(TXT_COL_NAME).setTopN(5).setOutputCol("similar_titles").fit(source).transform(target).lazyPrint(-1, "StringApproxNeareastNeighbor + " + metric.toString());
BatchOperator.execute();
}
for (String metric : new String[] { "JACCARD_SIM", "MINHASH_JACCARD_SIM", "SIMHASH_HAMMING_SIM" }) {
new Pipeline().add(new Segment().setSelectedCol(TXT_COL_NAME).setOutputCol("segmented_title")).add(new TextApproxNearestNeighbor().setMetric(metric).setSelectedCol("segmented_title").setIdCol(TXT_COL_NAME).setTopN(5).setOutputCol("similar_titles")).fit(source).transform(target).lazyPrint(-1, "TextApproxNeareastNeighbor + " + metric.toString());
BatchOperator.execute();
}
Pipeline snn = new Pipeline().add(new StringNearestNeighbor().setMetric("LEVENSHTEIN").setSelectedCol(TXT_COL_NAME).setIdCol(TXT_COL_NAME).setTopN(5).setOutputCol("similar_titles"));
Pipeline approx_snn = new Pipeline().add(new StringApproxNearestNeighbor().setMetric("JACCARD_SIM").setSelectedCol(TXT_COL_NAME).setIdCol(TXT_COL_NAME).setTopN(5).setOutputCol("similar_titles"));
Stopwatch sw = new Stopwatch();
if (!new File(DATA_DIR + SNN_MODEL_FILE).exists()) {
sw.reset();
sw.start();
snn.fit(source).save(DATA_DIR + SNN_MODEL_FILE);
BatchOperator.execute();
sw.stop();
System.out.println(sw.getElapsedTimeSpan());
}
if (!new File(DATA_DIR + APPROX_SNN_MODEL_FILE).exists()) {
sw.reset();
sw.start();
approx_snn.fit(source).save(DATA_DIR + APPROX_SNN_MODEL_FILE);
BatchOperator.execute();
sw.stop();
System.out.println(sw.getElapsedTimeSpan());
}
BatchOperator<?> target_stock = source.filter("category_name = 'stock'");
BatchOperator<?> target_news_story = source.filter("category_name = 'news_story'");
sw.reset();
sw.start();
PipelineModel.load(DATA_DIR + SNN_MODEL_FILE).transform(target_stock).lazyPrint(10, "StringNeareastNeighbor + LEVENSHTEIN");
BatchOperator.execute();
sw.stop();
System.out.println(sw.getElapsedTimeSpan());
sw.reset();
sw.start();
PipelineModel.load(DATA_DIR + APPROX_SNN_MODEL_FILE).transform(target_stock).lazyPrint(10, "JACCARD_SIM + stock");
BatchOperator.execute();
sw.stop();
System.out.println(sw.getElapsedTimeSpan());
sw.reset();
sw.start();
PipelineModel.load(DATA_DIR + APPROX_SNN_MODEL_FILE).transform(target_news_story).lazyPrint(10, "JACCARD_SIM + news_story");
BatchOperator.execute();
sw.stop();
System.out.println(sw.getElapsedTimeSpan());
StreamOperator.setParallelism(1);
StreamOperator<?> stream_target = new MemSourceStreamOp(rows, new String[] { TXT_COL_NAME });
PipelineModel.load(DATA_DIR + SNN_MODEL_FILE).transform(stream_target).print();
StreamOperator.execute();
StreamOperator<?> stream_target_stock = getStreamSource().filter("category_name = 'stock'");
sw.reset();
sw.start();
PipelineModel.load(DATA_DIR + APPROX_SNN_MODEL_FILE).transform(stream_target_stock).sample(0.02).print();
StreamOperator.execute();
sw.stop();
System.out.println(sw.getElapsedTimeSpan());
}
use of com.alibaba.alink.common.utils.Stopwatch in project Alink by alibaba.
the class Chap25 method c_3.
static void c_3() throws Exception {
Stopwatch sw = new Stopwatch();
sw.start();
AlinkGlobalConfiguration.setPrintProcessInfo(true);
AkSourceBatchOp train_set = new AkSourceBatchOp().setFilePath(Chap16.DATA_DIR + Chap16.TRAIN_FILE);
AkSourceBatchOp test_set = new AkSourceBatchOp().setFilePath(Chap16.DATA_DIR + Chap16.TEST_FILE);
linearReg(train_set, test_set);
dnnReg(train_set, test_set);
sw.stop();
System.out.println(sw.getElapsedTimeSpan());
}
use of com.alibaba.alink.common.utils.Stopwatch in project Alink by alibaba.
the class Chap25 method c_2.
static void c_2() throws Exception {
Stopwatch sw = new Stopwatch();
sw.start();
AlinkGlobalConfiguration.setPrintProcessInfo(true);
AkSourceBatchOp train_set = new AkSourceBatchOp().setFilePath(Chap13.DATA_DIR + Chap13.DENSE_TRAIN_FILE);
AkSourceBatchOp test_set = new AkSourceBatchOp().setFilePath(Chap13.DATA_DIR + Chap13.DENSE_TEST_FILE);
softmax(train_set, test_set);
dnn(train_set, test_set);
cnn(train_set, test_set);
sw.stop();
System.out.println(sw.getElapsedTimeSpan());
}
use of com.alibaba.alink.common.utils.Stopwatch in project Alink by alibaba.
the class Chap05 method main.
public static void main(String[] args) throws Exception {
Stopwatch sw = new Stopwatch();
sw.start();
BatchOperator.setParallelism(1);
c_1();
c_2_1();
c_2_2();
c_2_3();
c_3();
sw.stop();
System.out.println(sw.getElapsedTimeSpan());
}
Aggregations