use of com.alibaba.alink.operator.batch.sql.UnionAllBatchOp in project Alink by alibaba.
the class Chap05 method c_2_3.
static void c_2_3() throws Exception {
BatchOperator<?> users = Chap24.getSourceUsers();
BatchOperator users_1_4 = users.filter("user_id<5");
System.out.println("# users_1_4 #");
users_1_4.print();
BatchOperator users_3_6 = users.filter("user_id>2 AND user_id<7");
System.out.println("\n# users_3_6 #");
users_3_6.print();
new UnionAllBatchOp().linkFrom(users_1_4, users_3_6).print();
new UnionBatchOp().linkFrom(users_1_4, users_3_6).print();
new IntersectBatchOp().linkFrom(users_1_4, users_3_6).print();
new IntersectAllBatchOp().linkFrom(new UnionAllBatchOp().linkFrom(users_1_4, users_1_4), new UnionAllBatchOp().linkFrom(users_1_4, users_3_6)).print();
new MinusBatchOp().linkFrom(users_1_4, users_3_6).print();
new MinusAllBatchOp().linkFrom(new UnionAllBatchOp().linkFrom(users_1_4, users_1_4), new UnionAllBatchOp().linkFrom(users_1_4, users_3_6)).print();
}
use of com.alibaba.alink.operator.batch.sql.UnionAllBatchOp in project Alink by alibaba.
the class PackBatchOperatorUtil method packBatchOps.
/**
* pack batch ops
*/
public static BatchOperator packBatchOps(BatchOperator<?>[] batchOps) {
if (batchOps == null || batchOps.length == 0) {
throw new RuntimeException("batchOps must be set.");
}
Tuple2<TableSchema, List<int[]>> mergeTypesAndIndices = mergeTypes(batchOps);
TableSchema outSchema = mergeTypesAndIndices.f0;
List<int[]> colIndices = mergeTypesAndIndices.f1;
List<BatchOperator<?>> packedOps = new ArrayList<>();
packedOps.add(getPackMetaOp(batchOps, colIndices, outSchema));
for (int i = 0; i < batchOps.length; i++) {
packedOps.add(packBatchOp(batchOps[i], outSchema, i, colIndices.get(i)));
}
return new UnionAllBatchOp().setMLEnvironmentId(batchOps[0].getMLEnvironmentId()).linkFrom(packedOps);
}
use of com.alibaba.alink.operator.batch.sql.UnionAllBatchOp in project Alink by alibaba.
the class AlsTrainBatchOpTest method testPredict.
@Test
public void testPredict() {
Long envId = MLEnvironmentFactory.DEFAULT_ML_ENVIRONMENT_ID;
BatchOperator<?> samples = new MemSourceBatchOp(rows1, new String[] { "uid", "iid", "label" }).setMLEnvironmentId(envId);
BatchOperator<?> model = train();
AlsItemsPerUserRecommBatchOp predictor2 = new AlsItemsPerUserRecommBatchOp().setMLEnvironmentId(envId).setExcludeKnown(true).setUserCol("uid").setRecommCol("p");
AlsUsersPerItemRecommBatchOp predictor3 = new AlsUsersPerItemRecommBatchOp().setMLEnvironmentId(envId).setItemCol("iid").setRecommCol("p");
AlsSimilarUsersRecommBatchOp predictor4 = new AlsSimilarUsersRecommBatchOp().setMLEnvironmentId(envId).setUserCol("uid").setRecommCol("p");
AlsSimilarItemsRecommBatchOp predictor5 = new AlsSimilarItemsRecommBatchOp().setMLEnvironmentId(envId).setItemCol("iid").setRecommCol("p");
BatchOperator<?> result2 = predictor2.linkFrom(model, samples);
BatchOperator<?> result3 = predictor3.linkFrom(model, samples);
BatchOperator<?> result4 = predictor4.linkFrom(model, samples);
BatchOperator<?> result5 = predictor5.linkFrom(model, samples);
result2 = result2.select("*, 'AlsItemsPerUserRecommBatchOp' as rec_type");
result3 = result3.select("*, 'AlsUsersPerItemRecommBatchOp' as rec_type");
result4 = result4.select("*, 'AlsSimilarUsersRecommBatchOp' as rec_type");
result5 = result5.select("*, 'AlsSimilarItemsRecommBatchOp' as rec_type");
int s = new UnionAllBatchOp().setMLEnvironmentId(envId).linkFrom(result2, result3, result4, result5).collect().size();
Assert.assertEquals(s, 24);
}
use of com.alibaba.alink.operator.batch.sql.UnionAllBatchOp in project Alink by alibaba.
the class GraphEmbedding method trans2Index.
/**
* Transform vertex with index
* vocab, schema{NODE_COL:originalType, NODE_INDEX_COL:long}
* indexedGraph,schema {SOURCE_COL:long, TARGET_COL:long, WEIGHT_COL:double}
* indexWithType, if in2 is not null, then returned, schema {NODE_INDEX_COL:long, NODE_TYPE_COL:string}
*
* @param in1 is graph data
* @param in2 is the vertexList with vertexType, optional
* @param params user inputted parameters
* @return
*/
public static BatchOperator[] trans2Index(BatchOperator in1, BatchOperator in2, Params params) {
String sourceColName = params.get(HasSourceCol.SOURCE_COL);
String targetColName = params.get(HasTargetCol.TARGET_COL);
String clause;
if (params.contains(HasWeightCol.WEIGHT_COL)) {
String weightColName = params.get(HasWeightCol.WEIGHT_COL);
clause = "`" + sourceColName + "`, `" + targetColName + "`, `" + weightColName + "`";
} else {
clause = "`" + sourceColName + "`, `" + targetColName + "`, 1.0";
}
BatchOperator in = in1.select(clause).as(SOURCE_COL + ", " + TARGET_COL + ", " + WEIGHT_COL);
// count the times that all the words appear in the edges.
BatchOperator wordCnt = WordCountUtil.count(new UnionAllBatchOp().setMLEnvironmentId(in1.getMLEnvironmentId()).linkFrom(in.select(SOURCE_COL), in.select(TARGET_COL)).as(NODE_COL), NODE_COL);
// name each vocab with its index.
BatchOperator vocab = WordCountUtil.randomIndexVocab(wordCnt, 0).select(WordCountUtil.WORD_COL_NAME + " AS " + NODE_COL + ", " + WordCountUtil.INDEX_COL_NAME + " AS " + NODE_INDEX_COL);
// transform input and vocab to dataSet<Tuple>
DataSet<Tuple> inDataSet = in.getDataSet().map(new MapFunction<Row, Tuple3<Comparable, Comparable, Comparable>>() {
private static final long serialVersionUID = 8473819294214049730L;
@Override
public Tuple3<Comparable, Comparable, Comparable> map(Row value) throws Exception {
return Tuple3.of((Comparable) value.getField(0), (Comparable) value.getField(1), (Comparable) value.getField(2));
}
});
DataSet<Tuple2> vocabDataSet = vocab.getDataSet().map(new MapFunction<Row, Tuple2<Comparable, Long>>() {
private static final long serialVersionUID = 7241884458236714150L;
@Override
public Tuple2<Comparable, Long> map(Row value) throws Exception {
return Tuple2.of((Comparable) value.getField(0), (Long) value.getField(1));
}
});
// join operation
DataSet<Tuple> joinWithSourceColTuple = HackBatchOpJoin.join(inDataSet, vocabDataSet, 0, 0, new int[][] { { 1, 1 }, { 0, 1 }, { 0, 2 } });
DataSet<Tuple> indexGraphTuple = HackBatchOpJoin.join(joinWithSourceColTuple, vocabDataSet, 1, 0, new int[][] { { 0, 0 }, { 1, 1 }, { 0, 2 } });
// build batchOperator
TypeInformation<?>[] inTypes = in.getColTypes();
TypeInformation<?>[] vocabTypes = vocab.getColTypes();
BatchOperator indexedGraphBatchOp = new TableSourceBatchOp(DataSetConversionUtil.toTable(in.getMLEnvironmentId(), indexGraphTuple.map(new MapFunction<Tuple, Row>() {
private static final long serialVersionUID = -5386264086074581748L;
@Override
public Row map(Tuple value) throws Exception {
Row res = new Row(3);
res.setField(0, value.getField(0));
res.setField(1, value.getField(1));
res.setField(2, value.getField(2));
return res;
}
}), new String[] { SOURCE_COL, TARGET_COL, WEIGHT_COL }, new TypeInformation<?>[] { vocabTypes[1], vocabTypes[1], inTypes[2] }));
if (null == in2) {
return new BatchOperator[] { vocab, indexedGraphBatchOp };
} else {
BatchOperator in2Selected = in2.select("`" + params.get(HasVertexCol.VERTEX_COL) + "`, `" + params.get(HasTypeCol.TYPE_COL) + "`").as(TEMP_NODE_COL + ", " + NODE_TYPE_COL);
TypeInformation<?>[] types = new TypeInformation[2];
types[1] = in2.getColTypes()[TableUtil.findColIndex(in2.getSchema(), params.get(HasTypeCol.TYPE_COL))];
types[0] = vocab.getColTypes()[TableUtil.findColIndex(vocab.getSchema(), NODE_INDEX_COL)];
DataSet<Tuple> in2Tuple = in2Selected.getDataSet().map(new MapFunction<Row, Tuple2<Comparable, Comparable>>() {
private static final long serialVersionUID = 3459700988499538679L;
@Override
public Tuple2<Comparable, Comparable> map(Row value) throws Exception {
Tuple2<Comparable, Comparable> res = new Tuple2<>();
res.setField(value.getField(0), 0);
res.setField(value.getField(1), 1);
return res;
}
});
DataSet<Row> indexWithTypeRow = HackBatchOpJoin.join(in2Tuple, vocabDataSet, 0, 0, new int[][] { { 1, 1 }, { 0, 1 } }).map(new MapFunction<Tuple, Row>() {
private static final long serialVersionUID = -5747375637774394150L;
@Override
public Row map(Tuple value) throws Exception {
int length = value.getArity();
Row res = new Row(length);
for (int i = 0; i < length; i++) {
res.setField(i, value.getField(i));
}
return res;
}
});
BatchOperator indexWithType = new TableSourceBatchOp(DataSetConversionUtil.toTable(in.getMLEnvironmentId(), indexWithTypeRow, new String[] { NODE_INDEX_COL, NODE_TYPE_COL }, types)).setMLEnvironmentId(in.getMLEnvironmentId());
return new BatchOperator[] { vocab, indexedGraphBatchOp, indexWithType };
}
}
use of com.alibaba.alink.operator.batch.sql.UnionAllBatchOp in project Alink by alibaba.
the class LegacyModelExporterUtils method packPipelineStages.
/**
* Pack an array of transformers to a BatchOperator.
*/
@Deprecated
static BatchOperator<?> packPipelineStages(List<PipelineStageBase<?>> stages) {
int numStages = stages.size();
Row row = Row.of(-1L, getMetaOfPipelineStages(stages));
BatchOperator<?> packed = new MemSourceBatchOp(Collections.singletonList(row), PIPELINE_MODEL_SCHEMA).setMLEnvironmentId(stages.size() > 0 ? stages.get(0).getMLEnvironmentId() : MLEnvironmentFactory.DEFAULT_ML_ENVIRONMENT_ID);
for (int i = 0; i < numStages; i++) {
BatchOperator<?> data = null;
final long envId = stages.get(i).getMLEnvironmentId();
if (stages.get(i) instanceof PipelineModel) {
data = packTransformersArray(((PipelineModel) stages.get(i)).transformers);
} else if (stages.get(i) instanceof ModelBase) {
if (((ModelBase<?>) stages.get(i)).getModelData() != null) {
data = ((ModelBase<?>) stages.get(i)).getModelData().setMLEnvironmentId(envId);
data = data.link(new VectorSerializeBatchOp().setMLEnvironmentId(envId)).link(new TensorSerializeBatchOp().setMLEnvironmentId(envId));
}
} else if (stages.get(i) instanceof Pipeline) {
data = packPipelineStages(((Pipeline) stages.get(i)).stages);
}
if (data != null) {
packed = new UnionAllBatchOp().setMLEnvironmentId(envId).linkFrom(packed, packBatchOp(data, i));
}
}
return packed;
}
Aggregations