use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.
the class Chap17 method c_2_2.
static void c_2_2() throws Exception {
if (!new File(DATA_DIR + VECTOR_FILE).exists()) {
new CsvSourceBatchOp().setFilePath(DATA_DIR + ORIGIN_FILE).setSchemaStr(SCHEMA_STRING).link(new VectorAssemblerBatchOp().setSelectedCols(FEATURE_COL_NAMES).setOutputCol(VECTOR_COL_NAME).setReservedCols(LABEL_COL_NAME)).link(new AkSinkBatchOp().setFilePath(DATA_DIR + VECTOR_FILE));
BatchOperator.execute();
}
AkSourceBatchOp source = new AkSourceBatchOp().setFilePath(DATA_DIR + VECTOR_FILE);
source.lazyPrint(5);
KMeansTrainBatchOp kmeans_model = new KMeansTrainBatchOp().setK(2).setVectorCol(VECTOR_COL_NAME);
KMeansPredictBatchOp kmeans_pred = new KMeansPredictBatchOp().setPredictionCol(PREDICTION_COL_NAME);
source.link(kmeans_model);
kmeans_pred.linkFrom(kmeans_model, source);
kmeans_model.lazyPrintModelInfo();
kmeans_pred.lazyPrint(5);
kmeans_pred.link(new EvalClusterBatchOp().setVectorCol(VECTOR_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).lazyPrintMetrics("KMeans EUCLIDEAN"));
kmeans_pred.orderBy(PREDICTION_COL_NAME + ", " + LABEL_COL_NAME, 200, false).lazyPrint(-1, "all data");
BatchOperator.execute();
new KMeans().setK(2).setDistanceType(DistanceType.COSINE).setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).enableLazyPrintModelInfo().fit(source).transform(source).link(new EvalClusterBatchOp().setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setLabelCol(LABEL_COL_NAME).lazyPrintMetrics("KMeans COSINE"));
BatchOperator.execute();
}
use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.
the class Chap18 method c_1.
static void c_1() throws Exception {
AkSourceBatchOp dense_source = new AkSourceBatchOp().setFilePath(DATA_DIR + DENSE_TRAIN_FILE);
AkSourceBatchOp sparse_source = new AkSourceBatchOp().setFilePath(DATA_DIR + SPARSE_TRAIN_FILE);
Stopwatch sw = new Stopwatch();
ArrayList<Tuple2<String, Pipeline>> pipelineList = new ArrayList<>();
pipelineList.add(new Tuple2<>("KMeans EUCLIDEAN", new Pipeline().add(new KMeans().setK(10).setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME))));
pipelineList.add(new Tuple2<>("KMeans COSINE", new Pipeline().add(new KMeans().setDistanceType(DistanceType.COSINE).setK(10).setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME))));
pipelineList.add(new Tuple2<>("BisectingKMeans", new Pipeline().add(new BisectingKMeans().setK(10).setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME))));
for (Tuple2<String, Pipeline> pipelineTuple2 : pipelineList) {
sw.reset();
sw.start();
pipelineTuple2.f1.fit(dense_source).transform(dense_source).link(new EvalClusterBatchOp().setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setLabelCol(LABEL_COL_NAME).lazyPrintMetrics(pipelineTuple2.f0 + " DENSE"));
BatchOperator.execute();
sw.stop();
System.out.println(sw.getElapsedTimeSpan());
sw.reset();
sw.start();
pipelineTuple2.f1.fit(sparse_source).transform(sparse_source).link(new EvalClusterBatchOp().setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setLabelCol(LABEL_COL_NAME).lazyPrintMetrics(pipelineTuple2.f0 + " SPARSE"));
BatchOperator.execute();
sw.stop();
System.out.println(sw.getElapsedTimeSpan());
}
}
use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.
the class Chap19 method c_4.
static void c_4() throws Exception {
AkSourceBatchOp dense_train_data = new AkSourceBatchOp().setFilePath(DATA_DIR + DENSE_TRAIN_FILE);
AkSourceBatchOp dense_test_data = new AkSourceBatchOp().setFilePath(DATA_DIR + DENSE_TEST_FILE);
AkSourceBatchOp sparse_train_data = new AkSourceBatchOp().setFilePath(DATA_DIR + SPARSE_TRAIN_FILE);
AkSourceBatchOp sparse_test_data = new AkSourceBatchOp().setFilePath(DATA_DIR + SPARSE_TEST_FILE);
Stopwatch sw = new Stopwatch();
sw.reset();
sw.start();
new KnnClassifier().setK(3).setVectorCol(VECTOR_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).fit(dense_train_data).transform(dense_test_data).link(new EvalMultiClassBatchOp().setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).lazyPrintMetrics("KnnClassifier Dense"));
BatchOperator.execute();
sw.stop();
System.out.println(sw.getElapsedTimeSpan());
sw.reset();
sw.start();
new KnnClassifier().setK(3).setVectorCol(VECTOR_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).fit(sparse_train_data).transform(sparse_test_data).link(new EvalMultiClassBatchOp().setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).lazyPrintMetrics("KnnClassifier Sparse"));
BatchOperator.execute();
sw.stop();
System.out.println(sw.getElapsedTimeSpan());
sw.reset();
sw.start();
new Pipeline().add(new PCA().setK(39).setCalculationType(CalculationType.COV).setVectorCol(VECTOR_COL_NAME).setPredictionCol(VECTOR_COL_NAME)).add(new KnnClassifier().setK(3).setVectorCol(VECTOR_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME)).fit(dense_train_data).transform(dense_test_data).link(new EvalMultiClassBatchOp().setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).lazyPrintMetrics("Knn with PCA Dense"));
BatchOperator.execute();
sw.stop();
System.out.println(sw.getElapsedTimeSpan());
sw.reset();
sw.start();
new Pipeline().add(new PCA().setK(39).setCalculationType(CalculationType.COV).setVectorCol(VECTOR_COL_NAME).setPredictionCol(VECTOR_COL_NAME)).add(new KnnClassifier().setK(3).setVectorCol(VECTOR_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME)).fit(sparse_train_data).transform(sparse_test_data).link(new EvalMultiClassBatchOp().setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).lazyPrintMetrics("Knn with PCA Sparse"));
BatchOperator.execute();
sw.stop();
System.out.println(sw.getElapsedTimeSpan());
sw.reset();
sw.start();
new Pipeline().add(new PCAModel().setVectorCol(VECTOR_COL_NAME).setPredictionCol(VECTOR_COL_NAME).setModelData(new AkSourceBatchOp().setFilePath(DATA_DIR + PCA_MODEL_FILE))).add(new KnnClassifier().setK(3).setVectorCol(VECTOR_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME)).fit(dense_train_data).transform(dense_test_data).link(new EvalMultiClassBatchOp().setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).lazyPrintMetrics("Knn PCAModel Dense"));
BatchOperator.execute();
sw.stop();
System.out.println(sw.getElapsedTimeSpan());
sw.reset();
sw.start();
new Pipeline().add(new PCAModel().setVectorCol(VECTOR_COL_NAME).setPredictionCol(VECTOR_COL_NAME).setModelData(new AkSourceBatchOp().setFilePath(DATA_DIR + PCA_MODEL_FILE))).add(new KnnClassifier().setK(3).setVectorCol(VECTOR_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME)).fit(sparse_train_data).transform(sparse_test_data).link(new EvalMultiClassBatchOp().setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).lazyPrintMetrics("Knn PCAModel Sparse"));
BatchOperator.execute();
sw.stop();
System.out.println(sw.getElapsedTimeSpan());
}
use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.
the class Chap20 method c_3.
static void c_3() throws Exception {
Stopwatch sw = new Stopwatch();
sw.start();
AlinkGlobalConfiguration.setPrintProcessInfo(true);
AkSourceBatchOp source = new AkSourceBatchOp().setFilePath(Chap17.DATA_DIR + Chap17.VECTOR_FILE);
KMeans kmeans = new KMeans().setVectorCol(Chap17.VECTOR_COL_NAME).setPredictionCol(Chap17.PREDICTION_COL_NAME);
GridSearchCV cv = new GridSearchCV().setNumFolds(4).setEstimator(kmeans).setParamGrid(new ParamGrid().addGrid(kmeans, KMeans.K, new Integer[] { 2, 3, 4, 5, 6 }).addGrid(kmeans, KMeans.DISTANCE_TYPE, new DistanceType[] { DistanceType.EUCLIDEAN, DistanceType.COSINE })).setTuningEvaluator(new ClusterTuningEvaluator().setVectorCol(Chap17.VECTOR_COL_NAME).setPredictionCol(Chap17.PREDICTION_COL_NAME).setLabelCol(Chap17.LABEL_COL_NAME).setTuningClusterMetric(TuningClusterMetric.RI)).enableLazyPrintTrainInfo();
GridSearchCVModel bestModel = cv.fit(source);
bestModel.transform(source).link(new EvalClusterBatchOp().setLabelCol(Chap17.LABEL_COL_NAME).setVectorCol(Chap17.VECTOR_COL_NAME).setPredictionCol(Chap17.PREDICTION_COL_NAME).lazyPrintMetrics());
BatchOperator.execute();
sw.stop();
System.out.println(sw.getElapsedTimeSpan());
}
use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.
the class Chap20 method c_1.
static void c_1() throws Exception {
BatchOperator<?> train_data = new AkSourceBatchOp().setFilePath(Chap10.DATA_DIR + Chap10.TRAIN_FILE).select(Chap10.CLAUSE_CREATE_FEATURES);
BatchOperator<?> test_data = new AkSourceBatchOp().setFilePath(Chap10.DATA_DIR + Chap10.TEST_FILE).select(Chap10.CLAUSE_CREATE_FEATURES);
final String[] new_features = ArrayUtils.removeElement(train_data.getColNames(), Chap10.LABEL_COL_NAME);
LogisticRegression lr = new LogisticRegression().setFeatureCols(new_features).setLabelCol(Chap10.LABEL_COL_NAME).setPredictionCol(Chap10.PREDICTION_COL_NAME).setPredictionDetailCol(Chap10.PRED_DETAIL_COL_NAME);
Pipeline pipeline = new Pipeline().add(lr);
GridSearchCV gridSearch = new GridSearchCV().setNumFolds(5).setEstimator(pipeline).setParamGrid(new ParamGrid().addGrid(lr, LogisticRegression.L_1, new Double[] { 0.0000001, 0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1, 1.0, 10.0 })).setTuningEvaluator(new BinaryClassificationTuningEvaluator().setLabelCol(Chap10.LABEL_COL_NAME).setPredictionDetailCol(Chap10.PRED_DETAIL_COL_NAME).setTuningBinaryClassMetric(TuningBinaryClassMetric.AUC)).enableLazyPrintTrainInfo();
GridSearchCVModel bestModel = gridSearch.fit(train_data);
bestModel.transform(test_data).link(new EvalBinaryClassBatchOp().setPositiveLabelValueString("2").setLabelCol(Chap10.LABEL_COL_NAME).setPredictionDetailCol(Chap10.PRED_DETAIL_COL_NAME).lazyPrintMetrics("GridSearchCV"));
BatchOperator.execute();
}
Aggregations