use of com.alibaba.alink.pipeline.feature.PCA in project Alink by alibaba.
the class Chap19 method c_2.
static void c_2() throws Exception {
MemSourceBatchOp source = new MemSourceBatchOp(CRIME_ROWS_DATA, CRIME_COL_NAMES);
Pipeline std_pca = new Pipeline().add(new StandardScaler().setSelectedCols("murder", "rape", "robbery", "assault", "burglary", "larceny", "auto")).add(new PCA().setCalculationType(CalculationType.COV).setK(4).setSelectedCols("murder", "rape", "robbery", "assault", "burglary", "larceny", "auto").setPredictionCol(VECTOR_COL_NAME).enableLazyPrintModelInfo());
std_pca.fit(source).transform(source).link(new VectorToColumnsBatchOp().setVectorCol(VECTOR_COL_NAME).setSchemaStr("prin1 double, prin2 double, prin3 double, prin4 double").setReservedCols("state")).lazyPrint(10, "state with principle components");
BatchOperator.execute();
}
use of com.alibaba.alink.pipeline.feature.PCA in project Alink by alibaba.
the class Chap19 method c_4.
static void c_4() throws Exception {
AkSourceBatchOp dense_train_data = new AkSourceBatchOp().setFilePath(DATA_DIR + DENSE_TRAIN_FILE);
AkSourceBatchOp dense_test_data = new AkSourceBatchOp().setFilePath(DATA_DIR + DENSE_TEST_FILE);
AkSourceBatchOp sparse_train_data = new AkSourceBatchOp().setFilePath(DATA_DIR + SPARSE_TRAIN_FILE);
AkSourceBatchOp sparse_test_data = new AkSourceBatchOp().setFilePath(DATA_DIR + SPARSE_TEST_FILE);
Stopwatch sw = new Stopwatch();
sw.reset();
sw.start();
new KnnClassifier().setK(3).setVectorCol(VECTOR_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).fit(dense_train_data).transform(dense_test_data).link(new EvalMultiClassBatchOp().setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).lazyPrintMetrics("KnnClassifier Dense"));
BatchOperator.execute();
sw.stop();
System.out.println(sw.getElapsedTimeSpan());
sw.reset();
sw.start();
new KnnClassifier().setK(3).setVectorCol(VECTOR_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).fit(sparse_train_data).transform(sparse_test_data).link(new EvalMultiClassBatchOp().setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).lazyPrintMetrics("KnnClassifier Sparse"));
BatchOperator.execute();
sw.stop();
System.out.println(sw.getElapsedTimeSpan());
sw.reset();
sw.start();
new Pipeline().add(new PCA().setK(39).setCalculationType(CalculationType.COV).setVectorCol(VECTOR_COL_NAME).setPredictionCol(VECTOR_COL_NAME)).add(new KnnClassifier().setK(3).setVectorCol(VECTOR_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME)).fit(dense_train_data).transform(dense_test_data).link(new EvalMultiClassBatchOp().setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).lazyPrintMetrics("Knn with PCA Dense"));
BatchOperator.execute();
sw.stop();
System.out.println(sw.getElapsedTimeSpan());
sw.reset();
sw.start();
new Pipeline().add(new PCA().setK(39).setCalculationType(CalculationType.COV).setVectorCol(VECTOR_COL_NAME).setPredictionCol(VECTOR_COL_NAME)).add(new KnnClassifier().setK(3).setVectorCol(VECTOR_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME)).fit(sparse_train_data).transform(sparse_test_data).link(new EvalMultiClassBatchOp().setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).lazyPrintMetrics("Knn with PCA Sparse"));
BatchOperator.execute();
sw.stop();
System.out.println(sw.getElapsedTimeSpan());
sw.reset();
sw.start();
new Pipeline().add(new PCAModel().setVectorCol(VECTOR_COL_NAME).setPredictionCol(VECTOR_COL_NAME).setModelData(new AkSourceBatchOp().setFilePath(DATA_DIR + PCA_MODEL_FILE))).add(new KnnClassifier().setK(3).setVectorCol(VECTOR_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME)).fit(dense_train_data).transform(dense_test_data).link(new EvalMultiClassBatchOp().setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).lazyPrintMetrics("Knn PCAModel Dense"));
BatchOperator.execute();
sw.stop();
System.out.println(sw.getElapsedTimeSpan());
sw.reset();
sw.start();
new Pipeline().add(new PCAModel().setVectorCol(VECTOR_COL_NAME).setPredictionCol(VECTOR_COL_NAME).setModelData(new AkSourceBatchOp().setFilePath(DATA_DIR + PCA_MODEL_FILE))).add(new KnnClassifier().setK(3).setVectorCol(VECTOR_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME)).fit(sparse_train_data).transform(sparse_test_data).link(new EvalMultiClassBatchOp().setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).lazyPrintMetrics("Knn PCAModel Sparse"));
BatchOperator.execute();
sw.stop();
System.out.println(sw.getElapsedTimeSpan());
}
use of com.alibaba.alink.pipeline.feature.PCA in project Alink by alibaba.
the class Chap19 method c_1.
static void c_1() throws Exception {
MemSourceBatchOp source = new MemSourceBatchOp(CRIME_ROWS_DATA, CRIME_COL_NAMES);
source.lazyPrint(10, "Origin data");
BatchOperator<?> pca_result = new PCA().setK(4).setSelectedCols("murder", "rape", "robbery", "assault", "burglary", "larceny", "auto").setPredictionCol(VECTOR_COL_NAME).enableLazyPrintModelInfo().fit(source).transform(source).link(new VectorToColumnsBatchOp().setVectorCol(VECTOR_COL_NAME).setSchemaStr("prin1 double, prin2 double, prin3 double, prin4 double").setReservedCols("state")).lazyPrint(10, "state with principle components");
pca_result.select("state, prin1").orderBy("prin1", 100, false).lazyPrint(-1, "Order by prin1");
pca_result.select("state, prin2").orderBy("prin2", 100, false).lazyPrint(-1, "Order by prin2");
BatchOperator.execute();
}
Aggregations