use of com.alibaba.alink.operator.batch.sink.CsvSinkBatchOp in project Alink by alibaba.
the class CsvSourceSinkTest method testBatchCsvSinkAndSource.
@Category(DbTest.class)
@Test
public void testBatchCsvSinkAndSource() throws Exception {
String filePath = path + "file1.csv";
String[] columnNames = new String[] { "id", "content" };
BatchOperator data = new MemSourceBatchOp(rows, columnNames);
CsvSinkBatchOp sink = new CsvSinkBatchOp().setFilePath(filePath).setOverwriteSink(true);
data.link(sink);
BatchOperator.execute();
List<String> lines = Files.readAllLines(Paths.get(filePath));
lines.forEach(line -> {
int pos = line.indexOf(',');
String key = line.substring(0, pos);
Assert.assertEquals(line, actual.get(key));
});
BatchOperator source = new CsvSourceBatchOp().setFilePath(filePath).setSchemaStr("id bigint, content string");
List<Row> result = source.collect();
Assert.assertEquals(result.size(), 6);
for (Row row : result) {
boolean found = false;
for (Row ref : rows) {
if (row.getField(0).equals(ref.getField(0))) {
Assert.assertEquals(row.getField(1), ref.getField(1));
found = true;
break;
}
}
Assert.assertTrue(found);
}
}
use of com.alibaba.alink.operator.batch.sink.CsvSinkBatchOp in project Alink by alibaba.
the class PipelineSaveAndLoadTest method test3.
@Test
public void test3() throws Exception {
// # save model data to file (ModelBase)
String modelFilename = "/tmp/model12341.csv";
CsvSourceBatchOp source = new CsvSourceBatchOp().setSchemaStr("sepal_length double, sepal_width double, petal_length double, petal_width double, category string").setFilePath("https://alink-test-data.oss-cn-hangzhou.aliyuncs.com/iris.csv");
new QuantileDiscretizer().setNumBuckets(2).setSelectedCols("petal_length").fit(source).getModelData().link(new CsvSinkBatchOp().setFilePath(modelFilename).setOverwriteSink(true));
BatchOperator.execute();
// # save pipeline model data to file
QuantileDiscretizerModel model1 = new QuantileDiscretizer().setNumBuckets(2).setSelectedCols("sepal_length").fit(source);
Binarizer model2 = new Binarizer().setSelectedCol("petal_width").setThreshold(1.);
CsvSourceBatchOp modelData = new CsvSourceBatchOp().setFilePath(modelFilename).setSchemaStr("model_id BIGINT, model_info STRING");
QuantileDiscretizerModel model3 = new QuantileDiscretizerModel().setSelectedCols("petal_length").setModelData(modelData);
CsvSourceStreamOp streamSource = new CsvSourceStreamOp().setSchemaStr("sepal_length double, sepal_width double, petal_length double, petal_width double, category string").setFilePath("https://alink-test-data.oss-cn-hangzhou.aliyuncs.com/iris.csv");
PipelineModel pipelineModel = new PipelineModel(model1, model2, model3);
pipelineModel = PipelineModel.collectLoad(pipelineModel.save());
pipelineModel.transform(streamSource).print();
StreamOperator.execute();
}
use of com.alibaba.alink.operator.batch.sink.CsvSinkBatchOp in project Alink by alibaba.
the class Chap01 method c_5_1.
static void c_5_1() throws Exception {
CsvSourceBatchOp source = new CsvSourceBatchOp().setFilePath("http://archive.ics.uci.edu/ml/machine-learning-databases" + "/iris/iris.data").setSchemaStr("sepal_length double, sepal_width double, petal_length double, " + "petal_width double, category string");
source.firstN(5).print();
source.sampleWithSize(10).link(new CsvSinkBatchOp().setFilePath(DATA_DIR + "iris_10.data").setOverwriteSink(true));
BatchOperator.execute();
}
use of com.alibaba.alink.operator.batch.sink.CsvSinkBatchOp in project Alink by alibaba.
the class Chap03 method c_2_1_2.
static void c_2_1_2() throws Exception {
HadoopFileSystem hdfs = new HadoopFileSystem(HADOOP_VERSION, HDFS_URI);
OssFileSystem oss = new OssFileSystem(OSS_VERSION, OSS_END_POINT, OSS_BUCKET_NAME, OSS_ACCESS_ID, OSS_ACCESS_KEY);
FilePath[] filePaths = new FilePath[] { new FilePath(LOCAL_DIR + "iris.csv"), new FilePath(HDFS_URI + "user/yangxu/alink/data/temp/iris.csv", hdfs), new FilePath(OSS_PREFIX_URI + "alink/data/temp/iris.csv", oss) };
for (FilePath filePath : filePaths) {
new CsvSourceBatchOp().setFilePath(IRIS_HTTP_URL).setSchemaStr(IRIS_SCHEMA_STR).link(new CsvSinkBatchOp().setFilePath(filePath).setOverwriteSink(true));
BatchOperator.execute();
System.out.println(new CsvSourceBatchOp().setFilePath(filePath).setSchemaStr(IRIS_SCHEMA_STR).count());
}
for (FilePath filePath : filePaths) {
new CsvSourceStreamOp().setFilePath(IRIS_HTTP_URL).setSchemaStr(IRIS_SCHEMA_STR).link(new CsvSinkStreamOp().setFilePath(filePath).setOverwriteSink(true));
StreamOperator.execute();
new CsvSourceStreamOp().setFilePath(filePath).setSchemaStr(IRIS_SCHEMA_STR).filter("sepal_length < 4.5").print();
StreamOperator.execute();
}
}
Aggregations