use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.
the class AkExample method main.
public static void main(String[] args) throws Exception {
String URL = "https://alink-release.oss-cn-beijing.aliyuncs.com/data-files/iris.csv";
String SCHEMA_STR = "sepal_length double, sepal_width double, petal_length double, petal_width double, category string";
// Note: Complete the parameter below with the right oss configure.
BaseFileSystem<?> ossFileSystem = new OssFileSystem("OssVersion", "OssEndPoint", "OssBucket", "OssId", "OssKey");
// Note: Complete the parameter below with the right hdfs configure.
BaseFileSystem<?> hadoopFileSystem = new HadoopFileSystem("HadoopVersion", "HdfsFileSystemUri");
// csv to oss
CsvSourceBatchOp csvSourceBatchOp = new CsvSourceBatchOp().setFilePath(URL).setSchemaStr(SCHEMA_STR);
AkSinkBatchOp akSinkToOss = new AkSinkBatchOp().setFilePath(new FilePath("iris", ossFileSystem)).setOverwriteSink(true);
csvSourceBatchOp.link(akSinkToOss);
BatchOperator.execute();
// oss to hdfs
AkSourceBatchOp akSourceFromOss = new AkSourceBatchOp().setFilePath(new FilePath("iris", ossFileSystem));
AkSinkBatchOp akSinkToHdfs = new AkSinkBatchOp().setFilePath(new FilePath("iris", hadoopFileSystem)).setOverwriteSink(true);
akSourceFromOss.link(akSinkToHdfs);
BatchOperator.execute();
// hdfs to stdout
AkSourceBatchOp akSourceFromHdfs = new AkSourceBatchOp().setFilePath(new FilePath("iris", hadoopFileSystem));
akSourceFromHdfs.firstN(10).print();
}
use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.
the class TFTableModelClassificationFlatModelMapperTest method test.
@Category(DLTest.class)
@Test
public void test() throws Exception {
AlinkGlobalConfiguration.setPrintProcessInfo(true);
PluginDownloader pluginDownloader = AlinkGlobalConfiguration.getPluginDownloader();
RegisterKey registerKey = TFPredictorClassLoaderFactory.getRegisterKey();
pluginDownloader.downloadPlugin(registerKey.getName(), registerKey.getVersion());
List<Row> baseData = Arrays.asList(Row.of((float) 1.2, 3.4, 10, 3L, "bad"), Row.of((float) 1.2, 3.4, 2, 5L, "good"), Row.of((float) 1.2, 3.4, 6, 8L, "bad"), Row.of((float) 1.2, 3.4, 3, 2L, "good"));
String dataSchemaStr = "f float, d double, i int, l long, label string";
Random random = new Random();
List<Row> data = new ArrayList<>();
for (int i = 0; i < 1000; i += 1) {
data.add(baseData.get(random.nextInt(baseData.size())));
}
InputStream resourceAsStream = getClass().getClassLoader().getResourceAsStream("tf_table_model_binary_class_model.ak");
String modelPath = Files.createTempFile("tf_table_model_binary_class_model", ".ak").toString();
assert resourceAsStream != null;
FileUtils.copyInputStreamToFile(resourceAsStream, new File(modelPath));
BatchOperator<?> modelOp = new AkSourceBatchOp().setFilePath(modelPath);
List<Row> modelRows = modelOp.collect();
Params params = new Params();
params.set(HasPredictionCol.PREDICTION_COL, "pred");
params.set(HasPredictionDetailCol.PREDICTION_DETAIL_COL, "pred_detail");
params.set(HasReservedColsDefaultAsNull.RESERVED_COLS, new String[] { "l", "label" });
TFTableModelClassificationFlatModelMapper mapper = new TFTableModelClassificationFlatModelMapper(modelOp.getSchema(), CsvUtil.schemaStr2Schema(dataSchemaStr), params);
mapper.loadModel(modelRows);
List<Row> list = new ArrayList<>();
ListCollector<Row> collector = new ListCollector<>(list);
mapper.open();
for (Row row : data) {
mapper.flatMap(row, collector);
}
mapper.close();
Assert.assertEquals(TableSchema.builder().field("l", Types.LONG).field("label", Types.STRING).field("pred", Types.STRING).field("pred_detail", Types.STRING).build(), mapper.getOutputSchema());
Assert.assertEquals(data.size(), list.size());
for (int i = 0; i < data.size(); i += 1) {
Assert.assertEquals(4, list.get(i).getArity());
Assert.assertEquals(data.get(i).getField(3), list.get(i).getField(0));
Assert.assertEquals(data.get(i).getField(4), list.get(i).getField(1));
}
}
use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.
the class TFTableModelClassificationModelMapperTest method test.
@Category(DLTest.class)
@Test
public void test() throws Exception {
AlinkGlobalConfiguration.setPrintProcessInfo(true);
PluginDownloader pluginDownloader = AlinkGlobalConfiguration.getPluginDownloader();
RegisterKey registerKey = TFPredictorClassLoaderFactory.getRegisterKey();
pluginDownloader.downloadPlugin(registerKey.getName(), registerKey.getVersion());
List<Row> baseData = Arrays.asList(Row.of((float) 1.2, 3.4, 10, 3L, "bad"), Row.of((float) 1.2, 3.4, 2, 5L, "good"), Row.of((float) 1.2, 3.4, 6, 8L, "bad"), Row.of((float) 1.2, 3.4, 3, 2L, "good"));
String dataSchemaStr = "f float, d double, i int, l long, label string";
Random random = new Random();
List<Row> data = new ArrayList<>();
for (int i = 0; i < 1000; i += 1) {
data.add(baseData.get(random.nextInt(baseData.size())));
}
InputStream resourceAsStream = getClass().getClassLoader().getResourceAsStream("tf_table_model_binary_class_model.ak");
String modelPath = Files.createTempFile("tf_table_model_binary_class_model", ".ak").toString();
assert resourceAsStream != null;
FileUtils.copyInputStreamToFile(resourceAsStream, new File(modelPath));
BatchOperator<?> modelOp = new AkSourceBatchOp().setFilePath(modelPath);
List<Row> modelRows = modelOp.collect();
Params params = new Params();
params.set(HasPredictionCol.PREDICTION_COL, "pred");
params.set(HasPredictionDetailCol.PREDICTION_DETAIL_COL, "pred_detail");
params.set(HasReservedColsDefaultAsNull.RESERVED_COLS, new String[] { "l", "label" });
TFTableModelClassificationModelMapper mapper = new TFTableModelClassificationModelMapper(modelOp.getSchema(), CsvUtil.schemaStr2Schema(dataSchemaStr), params);
mapper.loadModel(modelRows);
mapper.open();
Assert.assertEquals(TableSchema.builder().field("l", Types.LONG).field("label", Types.STRING).field("pred", Types.STRING).field("pred_detail", Types.STRING).build(), mapper.getOutputSchema());
for (Row row : data) {
Row output = mapper.map(row);
Assert.assertEquals(4, output.getArity());
Assert.assertEquals(row.getField(3), output.getField(0));
Assert.assertEquals(row.getField(4), output.getField(1));
}
mapper.close();
}
use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.
the class TFTableModelRegressionModelMapperTest method test.
@Category(DLTest.class)
@Test
public void test() throws Exception {
AlinkGlobalConfiguration.setPrintProcessInfo(true);
PluginDownloader pluginDownloader = AlinkGlobalConfiguration.getPluginDownloader();
RegisterKey registerKey = TFPredictorClassLoaderFactory.getRegisterKey();
pluginDownloader.downloadPlugin(registerKey.getName(), registerKey.getVersion());
List<Row> baseData = Arrays.asList(Row.of(1.2, 3.4, 10L, 3L, "yes", 0.), Row.of(1.2, 3.4, 2L, 5L, "no", 0.2), Row.of(1.2, 3.4, 6L, 8L, "no", 0.4), Row.of(1.2, 3.4, 3L, 2L, "yes", 1.0));
String dataSchemaStr = "f double, d double, i long, l long, s string, label double";
Random random = new Random();
List<Row> data = new ArrayList<>();
for (int i = 0; i < 1000; i += 1) {
data.add(baseData.get(random.nextInt(baseData.size())));
}
InputStream resourceAsStream = getClass().getClassLoader().getResourceAsStream("tf_table_model_regression_model.ak");
String modelPath = Files.createTempFile("tf_table_model_regression_model", ".ak").toString();
assert resourceAsStream != null;
FileUtils.copyInputStreamToFile(resourceAsStream, new File(modelPath));
BatchOperator<?> modelOp = new AkSourceBatchOp().setFilePath(modelPath);
List<Row> modelRows = modelOp.collect();
Params params = new Params();
params.set(HasPredictionCol.PREDICTION_COL, "pred");
params.set(HasReservedColsDefaultAsNull.RESERVED_COLS, new String[] { "s", "label" });
TFTableModelRegressionModelMapper mapper = new TFTableModelRegressionModelMapper(modelOp.getSchema(), CsvUtil.schemaStr2Schema(dataSchemaStr), params);
mapper.loadModel(modelRows);
mapper.open();
Assert.assertEquals(TableSchema.builder().field("s", Types.STRING).field("label", Types.DOUBLE).field("pred", Types.DOUBLE).build(), mapper.getOutputSchema());
for (Row row : data) {
Row output = mapper.map(row);
Assert.assertEquals(3, output.getArity());
Assert.assertEquals(row.getField(4), output.getField(0));
Assert.assertEquals(row.getField(5), output.getField(1));
}
mapper.close();
}
use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.
the class Chap21 method c_7.
private static void c_7() throws Exception {
BatchOperator<?> docs = getSource().select(LABEL_COL_NAME + ", " + TXT_COL_NAME).link(new SegmentBatchOp().setSelectedCol(TXT_COL_NAME)).link(new StopWordsRemoverBatchOp().setSelectedCol(TXT_COL_NAME));
docs.lazyPrint(10);
if (!new File(DATA_DIR + LDA_MODEL_FILE).exists()) {
LdaTrainBatchOp lda = new LdaTrainBatchOp().setTopicNum(10).setNumIter(200).setVocabSize(20000).setSelectedCol(TXT_COL_NAME).setRandomSeed(123);
docs.link(lda);
lda.lazyPrintModelInfo();
lda.link(new AkSinkBatchOp().setFilePath(DATA_DIR + LDA_MODEL_FILE));
lda.getSideOutput(0).link(new AkSinkBatchOp().setFilePath(DATA_DIR + LDA_PWZ_FILE));
BatchOperator.execute();
}
new LdaPredictBatchOp().setSelectedCol(TXT_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol("predinfo").linkFrom(new AkSourceBatchOp().setFilePath(DATA_DIR + LDA_MODEL_FILE), docs).lazyPrint(5).link(new EvalClusterBatchOp().setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).lazyPrintMetrics());
AkSourceBatchOp pwz = new AkSourceBatchOp().setFilePath(DATA_DIR + LDA_PWZ_FILE);
pwz.sample(0.001).lazyPrint(10);
for (int t = 0; t < 10; t++) {
pwz.select("word, topic_" + t).orderBy("topic_" + t, 20, false).lazyPrint(-1, "topic" + t);
}
BatchOperator.execute();
}
Aggregations