Search in sources :

Example 1 with CsvSourceBatchOp

use of com.alibaba.alink.operator.batch.source.CsvSourceBatchOp in project Alink by alibaba.

the class Chap03 method c_2_3_1.

static void c_2_3_1() throws Exception {
    HadoopFileSystem hdfs = new HadoopFileSystem(HADOOP_VERSION, HDFS_URI);
    OssFileSystem oss = new OssFileSystem(OSS_VERSION, OSS_END_POINT, OSS_BUCKET_NAME, OSS_ACCESS_ID, OSS_ACCESS_KEY);
    FilePath[] filePaths = new FilePath[] { new FilePath(LOCAL_DIR + "iris.ak"), new FilePath(HDFS_URI + "user/yangxu/alink/data/temp/iris.ak", hdfs), new FilePath(OSS_PREFIX_URI + "alink/data/temp/iris.ak", oss) };
    for (FilePath filePath : filePaths) {
        new CsvSourceBatchOp().setFilePath(IRIS_HTTP_URL).setSchemaStr(IRIS_SCHEMA_STR).link(new AkSinkBatchOp().setFilePath(filePath).setOverwriteSink(true));
        BatchOperator.execute();
        System.out.println(new AkSourceBatchOp().setFilePath(filePath).count());
    }
    for (FilePath filePath : filePaths) {
        new CsvSourceStreamOp().setFilePath(IRIS_HTTP_URL).setSchemaStr(IRIS_SCHEMA_STR).link(new AkSinkStreamOp().setFilePath(filePath).setOverwriteSink(true));
        StreamOperator.execute();
        new AkSourceStreamOp().setFilePath(filePath).filter("sepal_length < 4.5").print();
        StreamOperator.execute();
    }
}
Also used : FilePath(com.alibaba.alink.common.io.filesystem.FilePath) AkSinkStreamOp(com.alibaba.alink.operator.stream.sink.AkSinkStreamOp) AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) AkSourceStreamOp(com.alibaba.alink.operator.stream.source.AkSourceStreamOp) AkSinkBatchOp(com.alibaba.alink.operator.batch.sink.AkSinkBatchOp) HadoopFileSystem(com.alibaba.alink.common.io.filesystem.HadoopFileSystem) CsvSourceStreamOp(com.alibaba.alink.operator.stream.source.CsvSourceStreamOp) OssFileSystem(com.alibaba.alink.common.io.filesystem.OssFileSystem) CsvSourceBatchOp(com.alibaba.alink.operator.batch.source.CsvSourceBatchOp)

Example 2 with CsvSourceBatchOp

use of com.alibaba.alink.operator.batch.source.CsvSourceBatchOp in project Alink by alibaba.

the class Chap04 method c_3.

static void c_3() throws Exception {
    DerbyCatalog derby = new DerbyCatalog("derby_catalog", null, DERBY_VERSION, DATA_DIR + DERBY_DIR);
    derby.open();
    derby.createDatabase(DB_NAME, new CatalogDatabaseImpl(new HashMap<>(), ""), true);
    derby.dropTable(new ObjectPath(DB_NAME, BATCH_TABLE_NAME), true);
    derby.dropTable(new ObjectPath(DB_NAME, STREAM_TABLE_NAME), true);
    new CsvSourceBatchOp().setFilePath(IRIS_URL).setSchemaStr(IRIS_SCHEMA_STR).lazyPrintStatistics("< origin data >").link(new CatalogSinkBatchOp().setCatalogObject(new CatalogObject(derby, new ObjectPath(DB_NAME, BATCH_TABLE_NAME))));
    BatchOperator.execute();
    new CsvSourceStreamOp().setFilePath(IRIS_URL).setSchemaStr(IRIS_SCHEMA_STR).link(new CatalogSinkStreamOp().setCatalogObject(new CatalogObject(derby, new ObjectPath(DB_NAME, STREAM_TABLE_NAME))));
    StreamOperator.execute();
    new CatalogSourceBatchOp().setCatalogObject(new CatalogObject(derby, new ObjectPath(DB_NAME, BATCH_TABLE_NAME))).lazyPrintStatistics("< batch catalog source >");
    BatchOperator.execute();
    new CatalogSourceStreamOp().setCatalogObject(new CatalogObject(derby, new ObjectPath(DB_NAME, STREAM_TABLE_NAME))).sample(0.02).print();
    StreamOperator.execute();
    System.out.println("< tables before drop >");
    System.out.println(JsonConverter.toJson(derby.listTables(DB_NAME)));
    if (derby.tableExists(new ObjectPath(DB_NAME, BATCH_TABLE_NAME))) {
        derby.dropTable(new ObjectPath(DB_NAME, BATCH_TABLE_NAME), false);
    }
    derby.dropTable(new ObjectPath(DB_NAME, STREAM_TABLE_NAME), true);
    System.out.println("< tables after drop >");
    System.out.println(JsonConverter.toJson(derby.listTables(DB_NAME)));
    derby.dropDatabase(DB_NAME, true);
    derby.close();
}
Also used : CatalogSinkBatchOp(com.alibaba.alink.operator.batch.sink.CatalogSinkBatchOp) CatalogSourceBatchOp(com.alibaba.alink.operator.batch.source.CatalogSourceBatchOp) ObjectPath(org.apache.flink.table.catalog.ObjectPath) CatalogSourceStreamOp(com.alibaba.alink.operator.stream.source.CatalogSourceStreamOp) HashMap(java.util.HashMap) DerbyCatalog(com.alibaba.alink.common.io.catalog.DerbyCatalog) CatalogSinkStreamOp(com.alibaba.alink.operator.stream.sink.CatalogSinkStreamOp) CsvSourceStreamOp(com.alibaba.alink.operator.stream.source.CsvSourceStreamOp) CatalogDatabaseImpl(org.apache.flink.table.catalog.CatalogDatabaseImpl) CsvSourceBatchOp(com.alibaba.alink.operator.batch.source.CsvSourceBatchOp) CatalogObject(com.alibaba.alink.params.io.HasCatalogObject.CatalogObject)

Example 3 with CsvSourceBatchOp

use of com.alibaba.alink.operator.batch.source.CsvSourceBatchOp in project Alink by alibaba.

the class Chap04 method c_4.

static void c_4() throws Exception {
    if (null != MYSQL_URL) {
        MySqlCatalog mySql = new MySqlCatalog("mysql_catalog", null, MYSQL_VERSION, MYSQL_URL, MYSQL_PORT, MYSQL_USER_NAME, MYSQL_PASSWORD);
        mySql.open();
        mySql.createDatabase(DB_NAME, new CatalogDatabaseImpl(new HashMap<>(), ""), true);
        new CsvSourceBatchOp().setFilePath(IRIS_URL).setSchemaStr(IRIS_SCHEMA_STR).lazyPrintStatistics("< origin data >").link(new CatalogSinkBatchOp().setCatalogObject(new CatalogObject(mySql, new ObjectPath(DB_NAME, BATCH_TABLE_NAME))));
        BatchOperator.execute();
        new CsvSourceStreamOp().setFilePath(IRIS_URL).setSchemaStr(IRIS_SCHEMA_STR).link(new CatalogSinkStreamOp().setCatalogObject(new CatalogObject(mySql, new ObjectPath(DB_NAME, STREAM_TABLE_NAME))));
        StreamOperator.execute();
        new CatalogSourceBatchOp().setCatalogObject(new CatalogObject(mySql, new ObjectPath(DB_NAME, BATCH_TABLE_NAME))).lazyPrintStatistics("< batch catalog source >");
        BatchOperator.execute();
        new CatalogSourceStreamOp().setCatalogObject(new CatalogObject(mySql, new ObjectPath(DB_NAME, STREAM_TABLE_NAME))).sample(0.02).print();
        StreamOperator.execute();
        System.out.println("< tables before drop >");
        System.out.println(JsonConverter.toJson(mySql.listTables(DB_NAME)));
        if (mySql.tableExists(new ObjectPath(DB_NAME, BATCH_TABLE_NAME))) {
            mySql.dropTable(new ObjectPath(DB_NAME, BATCH_TABLE_NAME), false);
        }
        mySql.dropTable(new ObjectPath(DB_NAME, STREAM_TABLE_NAME), true);
        System.out.println("< tables after drop >");
        System.out.println(JsonConverter.toJson(mySql.listTables(DB_NAME)));
        mySql.dropDatabase(DB_NAME, true);
        mySql.close();
    }
}
Also used : CatalogSinkBatchOp(com.alibaba.alink.operator.batch.sink.CatalogSinkBatchOp) CatalogSourceBatchOp(com.alibaba.alink.operator.batch.source.CatalogSourceBatchOp) ObjectPath(org.apache.flink.table.catalog.ObjectPath) CatalogSourceStreamOp(com.alibaba.alink.operator.stream.source.CatalogSourceStreamOp) MySqlCatalog(com.alibaba.alink.common.io.catalog.MySqlCatalog) HashMap(java.util.HashMap) CatalogSinkStreamOp(com.alibaba.alink.operator.stream.sink.CatalogSinkStreamOp) CsvSourceStreamOp(com.alibaba.alink.operator.stream.source.CsvSourceStreamOp) CatalogDatabaseImpl(org.apache.flink.table.catalog.CatalogDatabaseImpl) CsvSourceBatchOp(com.alibaba.alink.operator.batch.source.CsvSourceBatchOp) CatalogObject(com.alibaba.alink.params.io.HasCatalogObject.CatalogObject)

Example 4 with CsvSourceBatchOp

use of com.alibaba.alink.operator.batch.source.CsvSourceBatchOp in project Alink by alibaba.

the class Chap07 method c_1_1.

static void c_1_1() throws Exception {
    CsvSourceBatchOp source = new CsvSourceBatchOp().setFilePath(DATA_DIR + ORIGIN_FILE).setSchemaStr(SCHEMA_STRING);
    source.link(new FirstNBatchOp().setSize(5)).print();
    source.firstN(5).print();
}
Also used : FirstNBatchOp(com.alibaba.alink.operator.batch.dataproc.FirstNBatchOp) CsvSourceBatchOp(com.alibaba.alink.operator.batch.source.CsvSourceBatchOp)

Example 5 with CsvSourceBatchOp

use of com.alibaba.alink.operator.batch.source.CsvSourceBatchOp in project Alink by alibaba.

the class Chap07 method c_4_2.

static void c_4_2() throws Exception {
    BatchOperator<?> source = new CsvSourceBatchOp().setFilePath(DATA_DIR + ORIGIN_FILE).setSchemaStr(SCHEMA_STRING).link(new VectorAssemblerBatchOp().setSelectedCols(FEATURE_COL_NAMES).setOutputCol(VECTOR_COL_NAME).setReservedCols(LABEL_COL_NAME));
    source.link(new VectorNormalizeBatchOp().setSelectedCol(VECTOR_COL_NAME).setP(1.0)).firstN(5).print();
}
Also used : VectorAssemblerBatchOp(com.alibaba.alink.operator.batch.dataproc.vector.VectorAssemblerBatchOp) VectorNormalizeBatchOp(com.alibaba.alink.operator.batch.dataproc.vector.VectorNormalizeBatchOp) CsvSourceBatchOp(com.alibaba.alink.operator.batch.source.CsvSourceBatchOp)

Aggregations

CsvSourceBatchOp (com.alibaba.alink.operator.batch.source.CsvSourceBatchOp)67 Test (org.junit.Test)30 Category (org.junit.experimental.categories.Category)17 CsvSourceStreamOp (com.alibaba.alink.operator.stream.source.CsvSourceStreamOp)15 DLTest (com.alibaba.alink.testutil.categories.DLTest)14 PluginDownloader (com.alibaba.alink.common.io.plugin.PluginDownloader)8 RegisterKey (com.alibaba.alink.common.io.plugin.RegisterKey)8 ShuffleBatchOp (com.alibaba.alink.operator.batch.dataproc.ShuffleBatchOp)7 AkSinkBatchOp (com.alibaba.alink.operator.batch.sink.AkSinkBatchOp)6 File (java.io.File)6 HashMap (java.util.HashMap)6 Row (org.apache.flink.types.Row)6 AkSourceBatchOp (com.alibaba.alink.operator.batch.source.AkSourceBatchOp)5 Binarizer (com.alibaba.alink.pipeline.feature.Binarizer)5 QuantileDiscretizer (com.alibaba.alink.pipeline.feature.QuantileDiscretizer)5 FilePath (com.alibaba.alink.common.io.filesystem.FilePath)4 BatchOperator (com.alibaba.alink.operator.batch.BatchOperator)4 CsvSinkBatchOp (com.alibaba.alink.operator.batch.sink.CsvSinkBatchOp)4 QuantileDiscretizerModel (com.alibaba.alink.pipeline.feature.QuantileDiscretizerModel)4 Params (org.apache.flink.ml.api.misc.param.Params)4