use of com.alibaba.alink.operator.batch.source.CsvSourceBatchOp in project Alink by alibaba.
the class Chap03 method c_2_3_1.
static void c_2_3_1() throws Exception {
HadoopFileSystem hdfs = new HadoopFileSystem(HADOOP_VERSION, HDFS_URI);
OssFileSystem oss = new OssFileSystem(OSS_VERSION, OSS_END_POINT, OSS_BUCKET_NAME, OSS_ACCESS_ID, OSS_ACCESS_KEY);
FilePath[] filePaths = new FilePath[] { new FilePath(LOCAL_DIR + "iris.ak"), new FilePath(HDFS_URI + "user/yangxu/alink/data/temp/iris.ak", hdfs), new FilePath(OSS_PREFIX_URI + "alink/data/temp/iris.ak", oss) };
for (FilePath filePath : filePaths) {
new CsvSourceBatchOp().setFilePath(IRIS_HTTP_URL).setSchemaStr(IRIS_SCHEMA_STR).link(new AkSinkBatchOp().setFilePath(filePath).setOverwriteSink(true));
BatchOperator.execute();
System.out.println(new AkSourceBatchOp().setFilePath(filePath).count());
}
for (FilePath filePath : filePaths) {
new CsvSourceStreamOp().setFilePath(IRIS_HTTP_URL).setSchemaStr(IRIS_SCHEMA_STR).link(new AkSinkStreamOp().setFilePath(filePath).setOverwriteSink(true));
StreamOperator.execute();
new AkSourceStreamOp().setFilePath(filePath).filter("sepal_length < 4.5").print();
StreamOperator.execute();
}
}
use of com.alibaba.alink.operator.batch.source.CsvSourceBatchOp in project Alink by alibaba.
the class Chap04 method c_3.
static void c_3() throws Exception {
DerbyCatalog derby = new DerbyCatalog("derby_catalog", null, DERBY_VERSION, DATA_DIR + DERBY_DIR);
derby.open();
derby.createDatabase(DB_NAME, new CatalogDatabaseImpl(new HashMap<>(), ""), true);
derby.dropTable(new ObjectPath(DB_NAME, BATCH_TABLE_NAME), true);
derby.dropTable(new ObjectPath(DB_NAME, STREAM_TABLE_NAME), true);
new CsvSourceBatchOp().setFilePath(IRIS_URL).setSchemaStr(IRIS_SCHEMA_STR).lazyPrintStatistics("< origin data >").link(new CatalogSinkBatchOp().setCatalogObject(new CatalogObject(derby, new ObjectPath(DB_NAME, BATCH_TABLE_NAME))));
BatchOperator.execute();
new CsvSourceStreamOp().setFilePath(IRIS_URL).setSchemaStr(IRIS_SCHEMA_STR).link(new CatalogSinkStreamOp().setCatalogObject(new CatalogObject(derby, new ObjectPath(DB_NAME, STREAM_TABLE_NAME))));
StreamOperator.execute();
new CatalogSourceBatchOp().setCatalogObject(new CatalogObject(derby, new ObjectPath(DB_NAME, BATCH_TABLE_NAME))).lazyPrintStatistics("< batch catalog source >");
BatchOperator.execute();
new CatalogSourceStreamOp().setCatalogObject(new CatalogObject(derby, new ObjectPath(DB_NAME, STREAM_TABLE_NAME))).sample(0.02).print();
StreamOperator.execute();
System.out.println("< tables before drop >");
System.out.println(JsonConverter.toJson(derby.listTables(DB_NAME)));
if (derby.tableExists(new ObjectPath(DB_NAME, BATCH_TABLE_NAME))) {
derby.dropTable(new ObjectPath(DB_NAME, BATCH_TABLE_NAME), false);
}
derby.dropTable(new ObjectPath(DB_NAME, STREAM_TABLE_NAME), true);
System.out.println("< tables after drop >");
System.out.println(JsonConverter.toJson(derby.listTables(DB_NAME)));
derby.dropDatabase(DB_NAME, true);
derby.close();
}
use of com.alibaba.alink.operator.batch.source.CsvSourceBatchOp in project Alink by alibaba.
the class Chap04 method c_4.
static void c_4() throws Exception {
if (null != MYSQL_URL) {
MySqlCatalog mySql = new MySqlCatalog("mysql_catalog", null, MYSQL_VERSION, MYSQL_URL, MYSQL_PORT, MYSQL_USER_NAME, MYSQL_PASSWORD);
mySql.open();
mySql.createDatabase(DB_NAME, new CatalogDatabaseImpl(new HashMap<>(), ""), true);
new CsvSourceBatchOp().setFilePath(IRIS_URL).setSchemaStr(IRIS_SCHEMA_STR).lazyPrintStatistics("< origin data >").link(new CatalogSinkBatchOp().setCatalogObject(new CatalogObject(mySql, new ObjectPath(DB_NAME, BATCH_TABLE_NAME))));
BatchOperator.execute();
new CsvSourceStreamOp().setFilePath(IRIS_URL).setSchemaStr(IRIS_SCHEMA_STR).link(new CatalogSinkStreamOp().setCatalogObject(new CatalogObject(mySql, new ObjectPath(DB_NAME, STREAM_TABLE_NAME))));
StreamOperator.execute();
new CatalogSourceBatchOp().setCatalogObject(new CatalogObject(mySql, new ObjectPath(DB_NAME, BATCH_TABLE_NAME))).lazyPrintStatistics("< batch catalog source >");
BatchOperator.execute();
new CatalogSourceStreamOp().setCatalogObject(new CatalogObject(mySql, new ObjectPath(DB_NAME, STREAM_TABLE_NAME))).sample(0.02).print();
StreamOperator.execute();
System.out.println("< tables before drop >");
System.out.println(JsonConverter.toJson(mySql.listTables(DB_NAME)));
if (mySql.tableExists(new ObjectPath(DB_NAME, BATCH_TABLE_NAME))) {
mySql.dropTable(new ObjectPath(DB_NAME, BATCH_TABLE_NAME), false);
}
mySql.dropTable(new ObjectPath(DB_NAME, STREAM_TABLE_NAME), true);
System.out.println("< tables after drop >");
System.out.println(JsonConverter.toJson(mySql.listTables(DB_NAME)));
mySql.dropDatabase(DB_NAME, true);
mySql.close();
}
}
use of com.alibaba.alink.operator.batch.source.CsvSourceBatchOp in project Alink by alibaba.
the class Chap07 method c_1_1.
static void c_1_1() throws Exception {
CsvSourceBatchOp source = new CsvSourceBatchOp().setFilePath(DATA_DIR + ORIGIN_FILE).setSchemaStr(SCHEMA_STRING);
source.link(new FirstNBatchOp().setSize(5)).print();
source.firstN(5).print();
}
use of com.alibaba.alink.operator.batch.source.CsvSourceBatchOp in project Alink by alibaba.
the class Chap07 method c_4_2.
static void c_4_2() throws Exception {
BatchOperator<?> source = new CsvSourceBatchOp().setFilePath(DATA_DIR + ORIGIN_FILE).setSchemaStr(SCHEMA_STRING).link(new VectorAssemblerBatchOp().setSelectedCols(FEATURE_COL_NAMES).setOutputCol(VECTOR_COL_NAME).setReservedCols(LABEL_COL_NAME));
source.link(new VectorNormalizeBatchOp().setSelectedCol(VECTOR_COL_NAME).setP(1.0)).firstN(5).print();
}
Aggregations