Search in sources :

Example 1 with UnionAllBatchOp

use of com.alibaba.alink.operator.batch.sql.UnionAllBatchOp in project Alink by alibaba.

the class Chap05 method c_2_3.

static void c_2_3() throws Exception {
    BatchOperator<?> users = Chap24.getSourceUsers();
    BatchOperator users_1_4 = users.filter("user_id<5");
    System.out.println("# users_1_4 #");
    users_1_4.print();
    BatchOperator users_3_6 = users.filter("user_id>2 AND user_id<7");
    System.out.println("\n# users_3_6 #");
    users_3_6.print();
    new UnionAllBatchOp().linkFrom(users_1_4, users_3_6).print();
    new UnionBatchOp().linkFrom(users_1_4, users_3_6).print();
    new IntersectBatchOp().linkFrom(users_1_4, users_3_6).print();
    new IntersectAllBatchOp().linkFrom(new UnionAllBatchOp().linkFrom(users_1_4, users_1_4), new UnionAllBatchOp().linkFrom(users_1_4, users_3_6)).print();
    new MinusBatchOp().linkFrom(users_1_4, users_3_6).print();
    new MinusAllBatchOp().linkFrom(new UnionAllBatchOp().linkFrom(users_1_4, users_1_4), new UnionAllBatchOp().linkFrom(users_1_4, users_3_6)).print();
}
Also used : MinusBatchOp(com.alibaba.alink.operator.batch.sql.MinusBatchOp) IntersectAllBatchOp(com.alibaba.alink.operator.batch.sql.IntersectAllBatchOp) IntersectBatchOp(com.alibaba.alink.operator.batch.sql.IntersectBatchOp) UnionBatchOp(com.alibaba.alink.operator.batch.sql.UnionBatchOp) MinusAllBatchOp(com.alibaba.alink.operator.batch.sql.MinusAllBatchOp) BatchOperator(com.alibaba.alink.operator.batch.BatchOperator) UnionAllBatchOp(com.alibaba.alink.operator.batch.sql.UnionAllBatchOp)

Example 2 with UnionAllBatchOp

use of com.alibaba.alink.operator.batch.sql.UnionAllBatchOp in project Alink by alibaba.

the class PackBatchOperatorUtil method packBatchOps.

/**
 * pack batch ops
 */
public static BatchOperator packBatchOps(BatchOperator<?>[] batchOps) {
    if (batchOps == null || batchOps.length == 0) {
        throw new RuntimeException("batchOps must be set.");
    }
    Tuple2<TableSchema, List<int[]>> mergeTypesAndIndices = mergeTypes(batchOps);
    TableSchema outSchema = mergeTypesAndIndices.f0;
    List<int[]> colIndices = mergeTypesAndIndices.f1;
    List<BatchOperator<?>> packedOps = new ArrayList<>();
    packedOps.add(getPackMetaOp(batchOps, colIndices, outSchema));
    for (int i = 0; i < batchOps.length; i++) {
        packedOps.add(packBatchOp(batchOps[i], outSchema, i, colIndices.get(i)));
    }
    return new UnionAllBatchOp().setMLEnvironmentId(batchOps[0].getMLEnvironmentId()).linkFrom(packedOps);
}
Also used : TableSchema(org.apache.flink.table.api.TableSchema) ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List) BatchOperator(com.alibaba.alink.operator.batch.BatchOperator) UnionAllBatchOp(com.alibaba.alink.operator.batch.sql.UnionAllBatchOp)

Example 3 with UnionAllBatchOp

use of com.alibaba.alink.operator.batch.sql.UnionAllBatchOp in project Alink by alibaba.

the class AlsTrainBatchOpTest method testPredict.

@Test
public void testPredict() {
    Long envId = MLEnvironmentFactory.DEFAULT_ML_ENVIRONMENT_ID;
    BatchOperator<?> samples = new MemSourceBatchOp(rows1, new String[] { "uid", "iid", "label" }).setMLEnvironmentId(envId);
    BatchOperator<?> model = train();
    AlsItemsPerUserRecommBatchOp predictor2 = new AlsItemsPerUserRecommBatchOp().setMLEnvironmentId(envId).setExcludeKnown(true).setUserCol("uid").setRecommCol("p");
    AlsUsersPerItemRecommBatchOp predictor3 = new AlsUsersPerItemRecommBatchOp().setMLEnvironmentId(envId).setItemCol("iid").setRecommCol("p");
    AlsSimilarUsersRecommBatchOp predictor4 = new AlsSimilarUsersRecommBatchOp().setMLEnvironmentId(envId).setUserCol("uid").setRecommCol("p");
    AlsSimilarItemsRecommBatchOp predictor5 = new AlsSimilarItemsRecommBatchOp().setMLEnvironmentId(envId).setItemCol("iid").setRecommCol("p");
    BatchOperator<?> result2 = predictor2.linkFrom(model, samples);
    BatchOperator<?> result3 = predictor3.linkFrom(model, samples);
    BatchOperator<?> result4 = predictor4.linkFrom(model, samples);
    BatchOperator<?> result5 = predictor5.linkFrom(model, samples);
    result2 = result2.select("*, 'AlsItemsPerUserRecommBatchOp' as rec_type");
    result3 = result3.select("*, 'AlsUsersPerItemRecommBatchOp' as rec_type");
    result4 = result4.select("*, 'AlsSimilarUsersRecommBatchOp' as rec_type");
    result5 = result5.select("*, 'AlsSimilarItemsRecommBatchOp' as rec_type");
    int s = new UnionAllBatchOp().setMLEnvironmentId(envId).linkFrom(result2, result3, result4, result5).collect().size();
    Assert.assertEquals(s, 24);
}
Also used : MemSourceBatchOp(com.alibaba.alink.operator.batch.source.MemSourceBatchOp) UnionAllBatchOp(com.alibaba.alink.operator.batch.sql.UnionAllBatchOp) Test(org.junit.Test)

Example 4 with UnionAllBatchOp

use of com.alibaba.alink.operator.batch.sql.UnionAllBatchOp in project Alink by alibaba.

the class GraphEmbedding method trans2Index.

/**
 * Transform vertex with index
 * vocab, schema{NODE_COL:originalType, NODE_INDEX_COL:long}
 * indexedGraph,schema {SOURCE_COL:long, TARGET_COL:long, WEIGHT_COL:double}
 * indexWithType, if in2 is not null, then returned, schema {NODE_INDEX_COL:long, NODE_TYPE_COL:string}
 *
 * @param in1    is graph data
 * @param in2    is the vertexList with vertexType, optional
 * @param params user inputted parameters
 * @return
 */
public static BatchOperator[] trans2Index(BatchOperator in1, BatchOperator in2, Params params) {
    String sourceColName = params.get(HasSourceCol.SOURCE_COL);
    String targetColName = params.get(HasTargetCol.TARGET_COL);
    String clause;
    if (params.contains(HasWeightCol.WEIGHT_COL)) {
        String weightColName = params.get(HasWeightCol.WEIGHT_COL);
        clause = "`" + sourceColName + "`, `" + targetColName + "`, `" + weightColName + "`";
    } else {
        clause = "`" + sourceColName + "`, `" + targetColName + "`, 1.0";
    }
    BatchOperator in = in1.select(clause).as(SOURCE_COL + ", " + TARGET_COL + ", " + WEIGHT_COL);
    // count the times that all the words appear in the edges.
    BatchOperator wordCnt = WordCountUtil.count(new UnionAllBatchOp().setMLEnvironmentId(in1.getMLEnvironmentId()).linkFrom(in.select(SOURCE_COL), in.select(TARGET_COL)).as(NODE_COL), NODE_COL);
    // name each vocab with its index.
    BatchOperator vocab = WordCountUtil.randomIndexVocab(wordCnt, 0).select(WordCountUtil.WORD_COL_NAME + " AS " + NODE_COL + ", " + WordCountUtil.INDEX_COL_NAME + " AS " + NODE_INDEX_COL);
    // transform input and vocab to dataSet<Tuple>
    DataSet<Tuple> inDataSet = in.getDataSet().map(new MapFunction<Row, Tuple3<Comparable, Comparable, Comparable>>() {

        private static final long serialVersionUID = 8473819294214049730L;

        @Override
        public Tuple3<Comparable, Comparable, Comparable> map(Row value) throws Exception {
            return Tuple3.of((Comparable) value.getField(0), (Comparable) value.getField(1), (Comparable) value.getField(2));
        }
    });
    DataSet<Tuple2> vocabDataSet = vocab.getDataSet().map(new MapFunction<Row, Tuple2<Comparable, Long>>() {

        private static final long serialVersionUID = 7241884458236714150L;

        @Override
        public Tuple2<Comparable, Long> map(Row value) throws Exception {
            return Tuple2.of((Comparable) value.getField(0), (Long) value.getField(1));
        }
    });
    // join operation
    DataSet<Tuple> joinWithSourceColTuple = HackBatchOpJoin.join(inDataSet, vocabDataSet, 0, 0, new int[][] { { 1, 1 }, { 0, 1 }, { 0, 2 } });
    DataSet<Tuple> indexGraphTuple = HackBatchOpJoin.join(joinWithSourceColTuple, vocabDataSet, 1, 0, new int[][] { { 0, 0 }, { 1, 1 }, { 0, 2 } });
    // build batchOperator
    TypeInformation<?>[] inTypes = in.getColTypes();
    TypeInformation<?>[] vocabTypes = vocab.getColTypes();
    BatchOperator indexedGraphBatchOp = new TableSourceBatchOp(DataSetConversionUtil.toTable(in.getMLEnvironmentId(), indexGraphTuple.map(new MapFunction<Tuple, Row>() {

        private static final long serialVersionUID = -5386264086074581748L;

        @Override
        public Row map(Tuple value) throws Exception {
            Row res = new Row(3);
            res.setField(0, value.getField(0));
            res.setField(1, value.getField(1));
            res.setField(2, value.getField(2));
            return res;
        }
    }), new String[] { SOURCE_COL, TARGET_COL, WEIGHT_COL }, new TypeInformation<?>[] { vocabTypes[1], vocabTypes[1], inTypes[2] }));
    if (null == in2) {
        return new BatchOperator[] { vocab, indexedGraphBatchOp };
    } else {
        BatchOperator in2Selected = in2.select("`" + params.get(HasVertexCol.VERTEX_COL) + "`, `" + params.get(HasTypeCol.TYPE_COL) + "`").as(TEMP_NODE_COL + ", " + NODE_TYPE_COL);
        TypeInformation<?>[] types = new TypeInformation[2];
        types[1] = in2.getColTypes()[TableUtil.findColIndex(in2.getSchema(), params.get(HasTypeCol.TYPE_COL))];
        types[0] = vocab.getColTypes()[TableUtil.findColIndex(vocab.getSchema(), NODE_INDEX_COL)];
        DataSet<Tuple> in2Tuple = in2Selected.getDataSet().map(new MapFunction<Row, Tuple2<Comparable, Comparable>>() {

            private static final long serialVersionUID = 3459700988499538679L;

            @Override
            public Tuple2<Comparable, Comparable> map(Row value) throws Exception {
                Tuple2<Comparable, Comparable> res = new Tuple2<>();
                res.setField(value.getField(0), 0);
                res.setField(value.getField(1), 1);
                return res;
            }
        });
        DataSet<Row> indexWithTypeRow = HackBatchOpJoin.join(in2Tuple, vocabDataSet, 0, 0, new int[][] { { 1, 1 }, { 0, 1 } }).map(new MapFunction<Tuple, Row>() {

            private static final long serialVersionUID = -5747375637774394150L;

            @Override
            public Row map(Tuple value) throws Exception {
                int length = value.getArity();
                Row res = new Row(length);
                for (int i = 0; i < length; i++) {
                    res.setField(i, value.getField(i));
                }
                return res;
            }
        });
        BatchOperator indexWithType = new TableSourceBatchOp(DataSetConversionUtil.toTable(in.getMLEnvironmentId(), indexWithTypeRow, new String[] { NODE_INDEX_COL, NODE_TYPE_COL }, types)).setMLEnvironmentId(in.getMLEnvironmentId());
        return new BatchOperator[] { vocab, indexedGraphBatchOp, indexWithType };
    }
}
Also used : TableSourceBatchOp(com.alibaba.alink.operator.batch.source.TableSourceBatchOp) BatchOperator(com.alibaba.alink.operator.batch.BatchOperator) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) UnionAllBatchOp(com.alibaba.alink.operator.batch.sql.UnionAllBatchOp) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Tuple3(org.apache.flink.api.java.tuple.Tuple3) Row(org.apache.flink.types.Row) Tuple(org.apache.flink.api.java.tuple.Tuple)

Example 5 with UnionAllBatchOp

use of com.alibaba.alink.operator.batch.sql.UnionAllBatchOp in project Alink by alibaba.

the class LegacyModelExporterUtils method packPipelineStages.

/**
 * Pack an array of transformers to a BatchOperator.
 */
@Deprecated
static BatchOperator<?> packPipelineStages(List<PipelineStageBase<?>> stages) {
    int numStages = stages.size();
    Row row = Row.of(-1L, getMetaOfPipelineStages(stages));
    BatchOperator<?> packed = new MemSourceBatchOp(Collections.singletonList(row), PIPELINE_MODEL_SCHEMA).setMLEnvironmentId(stages.size() > 0 ? stages.get(0).getMLEnvironmentId() : MLEnvironmentFactory.DEFAULT_ML_ENVIRONMENT_ID);
    for (int i = 0; i < numStages; i++) {
        BatchOperator<?> data = null;
        final long envId = stages.get(i).getMLEnvironmentId();
        if (stages.get(i) instanceof PipelineModel) {
            data = packTransformersArray(((PipelineModel) stages.get(i)).transformers);
        } else if (stages.get(i) instanceof ModelBase) {
            if (((ModelBase<?>) stages.get(i)).getModelData() != null) {
                data = ((ModelBase<?>) stages.get(i)).getModelData().setMLEnvironmentId(envId);
                data = data.link(new VectorSerializeBatchOp().setMLEnvironmentId(envId)).link(new TensorSerializeBatchOp().setMLEnvironmentId(envId));
            }
        } else if (stages.get(i) instanceof Pipeline) {
            data = packPipelineStages(((Pipeline) stages.get(i)).stages);
        }
        if (data != null) {
            packed = new UnionAllBatchOp().setMLEnvironmentId(envId).linkFrom(packed, packBatchOp(data, i));
        }
    }
    return packed;
}
Also used : MemSourceBatchOp(com.alibaba.alink.operator.batch.source.MemSourceBatchOp) TensorSerializeBatchOp(com.alibaba.alink.operator.batch.utils.TensorSerializeBatchOp) Row(org.apache.flink.types.Row) VectorSerializeBatchOp(com.alibaba.alink.operator.batch.utils.VectorSerializeBatchOp) UnionAllBatchOp(com.alibaba.alink.operator.batch.sql.UnionAllBatchOp)

Aggregations

UnionAllBatchOp (com.alibaba.alink.operator.batch.sql.UnionAllBatchOp)6 BatchOperator (com.alibaba.alink.operator.batch.BatchOperator)3 MemSourceBatchOp (com.alibaba.alink.operator.batch.source.MemSourceBatchOp)2 Row (org.apache.flink.types.Row)2 TableSourceBatchOp (com.alibaba.alink.operator.batch.source.TableSourceBatchOp)1 IntersectAllBatchOp (com.alibaba.alink.operator.batch.sql.IntersectAllBatchOp)1 IntersectBatchOp (com.alibaba.alink.operator.batch.sql.IntersectBatchOp)1 MinusAllBatchOp (com.alibaba.alink.operator.batch.sql.MinusAllBatchOp)1 MinusBatchOp (com.alibaba.alink.operator.batch.sql.MinusBatchOp)1 UnionBatchOp (com.alibaba.alink.operator.batch.sql.UnionBatchOp)1 TensorSerializeBatchOp (com.alibaba.alink.operator.batch.utils.TensorSerializeBatchOp)1 VectorSerializeBatchOp (com.alibaba.alink.operator.batch.utils.VectorSerializeBatchOp)1 ArrayList (java.util.ArrayList)1 List (java.util.List)1 TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)1 Tuple (org.apache.flink.api.java.tuple.Tuple)1 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)1 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)1 TableSchema (org.apache.flink.table.api.TableSchema)1 Test (org.junit.Test)1