Search in sources :

Example 11 with Seq

use of scala.collection.Seq in project flink by apache.

the class PushPartitionIntoTableSourceScanRule method onMatch.

@Override
public void onMatch(RelOptRuleCall call) {
    Filter filter = call.rel(0);
    LogicalTableScan scan = call.rel(1);
    TableSourceTable tableSourceTable = scan.getTable().unwrap(TableSourceTable.class);
    RelDataType inputFieldTypes = filter.getInput().getRowType();
    List<String> inputFieldNames = inputFieldTypes.getFieldNames();
    List<String> partitionFieldNames = tableSourceTable.contextResolvedTable().<ResolvedCatalogTable>getResolvedTable().getPartitionKeys();
    // extract partition predicates
    RelBuilder relBuilder = call.builder();
    RexBuilder rexBuilder = relBuilder.getRexBuilder();
    Tuple2<Seq<RexNode>, Seq<RexNode>> allPredicates = RexNodeExtractor.extractPartitionPredicateList(filter.getCondition(), FlinkRelOptUtil.getMaxCnfNodeCount(scan), inputFieldNames.toArray(new String[0]), rexBuilder, partitionFieldNames.toArray(new String[0]));
    RexNode partitionPredicate = RexUtil.composeConjunction(rexBuilder, JavaConversions.seqAsJavaList(allPredicates._1));
    if (partitionPredicate.isAlwaysTrue()) {
        return;
    }
    // build pruner
    LogicalType[] partitionFieldTypes = partitionFieldNames.stream().map(name -> {
        int index = inputFieldNames.indexOf(name);
        if (index < 0) {
            throw new TableException(String.format("Partitioned key '%s' isn't found in input columns. " + "Validator should have checked that.", name));
        }
        return inputFieldTypes.getFieldList().get(index).getType();
    }).map(FlinkTypeFactory::toLogicalType).toArray(LogicalType[]::new);
    RexNode finalPartitionPredicate = adjustPartitionPredicate(inputFieldNames, partitionFieldNames, partitionPredicate);
    FlinkContext context = ShortcutUtils.unwrapContext(scan);
    Function<List<Map<String, String>>, List<Map<String, String>>> defaultPruner = partitions -> PartitionPruner.prunePartitions(context.getTableConfig(), partitionFieldNames.toArray(new String[0]), partitionFieldTypes, partitions, finalPartitionPredicate);
    // prune partitions
    List<Map<String, String>> remainingPartitions = readPartitionsAndPrune(rexBuilder, context, tableSourceTable, defaultPruner, allPredicates._1(), inputFieldNames);
    // apply push down
    DynamicTableSource dynamicTableSource = tableSourceTable.tableSource().copy();
    PartitionPushDownSpec partitionPushDownSpec = new PartitionPushDownSpec(remainingPartitions);
    partitionPushDownSpec.apply(dynamicTableSource, SourceAbilityContext.from(scan));
    // build new statistic
    TableStats newTableStat = null;
    if (tableSourceTable.contextResolvedTable().isPermanent()) {
        ObjectIdentifier identifier = tableSourceTable.contextResolvedTable().getIdentifier();
        ObjectPath tablePath = identifier.toObjectPath();
        Catalog catalog = tableSourceTable.contextResolvedTable().getCatalog().get();
        for (Map<String, String> partition : remainingPartitions) {
            Optional<TableStats> partitionStats = getPartitionStats(catalog, tablePath, partition);
            if (!partitionStats.isPresent()) {
                // clear all information before
                newTableStat = null;
                break;
            } else {
                newTableStat = newTableStat == null ? partitionStats.get() : newTableStat.merge(partitionStats.get());
            }
        }
    }
    FlinkStatistic newStatistic = FlinkStatistic.builder().statistic(tableSourceTable.getStatistic()).tableStats(newTableStat).build();
    TableSourceTable newTableSourceTable = tableSourceTable.copy(dynamicTableSource, newStatistic, new SourceAbilitySpec[] { partitionPushDownSpec });
    LogicalTableScan newScan = LogicalTableScan.create(scan.getCluster(), newTableSourceTable, scan.getHints());
    // transform to new node
    RexNode nonPartitionPredicate = RexUtil.composeConjunction(rexBuilder, JavaConversions.seqAsJavaList(allPredicates._2()));
    if (nonPartitionPredicate.isAlwaysTrue()) {
        call.transformTo(newScan);
    } else {
        Filter newFilter = filter.copy(filter.getTraitSet(), newScan, nonPartitionPredicate);
        call.transformTo(newFilter);
    }
}
Also used : CatalogColumnStatistics(org.apache.flink.table.catalog.stats.CatalogColumnStatistics) Arrays(java.util.Arrays) SourceAbilityContext(org.apache.flink.table.planner.plan.abilities.source.SourceAbilityContext) PartitionNotExistException(org.apache.flink.table.catalog.exceptions.PartitionNotExistException) CatalogTable(org.apache.flink.table.catalog.CatalogTable) ShortcutUtils(org.apache.flink.table.planner.utils.ShortcutUtils) SupportsPartitionPushDown(org.apache.flink.table.connector.source.abilities.SupportsPartitionPushDown) FlinkTypeFactory(org.apache.flink.table.planner.calcite.FlinkTypeFactory) RexUtil(org.apache.calcite.rex.RexUtil) RexNode(org.apache.calcite.rex.RexNode) RelBuilder(org.apache.calcite.tools.RelBuilder) ResolvedExpression(org.apache.flink.table.expressions.ResolvedExpression) Map(java.util.Map) TableStats(org.apache.flink.table.plan.stats.TableStats) DynamicTableSource(org.apache.flink.table.connector.source.DynamicTableSource) PartitionPushDownSpec(org.apache.flink.table.planner.plan.abilities.source.PartitionPushDownSpec) TimeZone(java.util.TimeZone) Seq(scala.collection.Seq) FlinkContext(org.apache.flink.table.planner.calcite.FlinkContext) Tuple2(scala.Tuple2) Collectors(java.util.stream.Collectors) SourceAbilitySpec(org.apache.flink.table.planner.plan.abilities.source.SourceAbilitySpec) RexInputRef(org.apache.calcite.rex.RexInputRef) List(java.util.List) TableNotPartitionedException(org.apache.flink.table.catalog.exceptions.TableNotPartitionedException) LogicalType(org.apache.flink.table.types.logical.LogicalType) Optional(java.util.Optional) RexNodeToExpressionConverter(org.apache.flink.table.planner.plan.utils.RexNodeToExpressionConverter) LogicalTableScan(org.apache.calcite.rel.logical.LogicalTableScan) ObjectIdentifier(org.apache.flink.table.catalog.ObjectIdentifier) CatalogTableStatisticsConverter(org.apache.flink.table.planner.utils.CatalogTableStatisticsConverter) RexNodeExtractor(org.apache.flink.table.planner.plan.utils.RexNodeExtractor) Expression(org.apache.flink.table.expressions.Expression) Filter(org.apache.calcite.rel.core.Filter) ObjectPath(org.apache.flink.table.catalog.ObjectPath) Function(java.util.function.Function) ArrayList(java.util.ArrayList) CatalogTableStatistics(org.apache.flink.table.catalog.stats.CatalogTableStatistics) Catalog(org.apache.flink.table.catalog.Catalog) PartitionPruner(org.apache.flink.table.planner.plan.utils.PartitionPruner) ResolvedCatalogTable(org.apache.flink.table.catalog.ResolvedCatalogTable) RelDataType(org.apache.calcite.rel.type.RelDataType) JavaConversions(scala.collection.JavaConversions) TableNotExistException(org.apache.flink.table.catalog.exceptions.TableNotExistException) RexBuilder(org.apache.calcite.rex.RexBuilder) TableException(org.apache.flink.table.api.TableException) Option(scala.Option) FlinkRelOptUtil(org.apache.flink.table.planner.plan.utils.FlinkRelOptUtil) TableSourceTable(org.apache.flink.table.planner.plan.schema.TableSourceTable) RelOptRuleCall(org.apache.calcite.plan.RelOptRuleCall) CatalogPartitionSpec(org.apache.flink.table.catalog.CatalogPartitionSpec) RelOptRule(org.apache.calcite.plan.RelOptRule) FlinkStatistic(org.apache.flink.table.planner.plan.stats.FlinkStatistic) RexShuttle(org.apache.calcite.rex.RexShuttle) CatalogException(org.apache.flink.table.catalog.exceptions.CatalogException) ObjectPath(org.apache.flink.table.catalog.ObjectPath) LogicalType(org.apache.flink.table.types.logical.LogicalType) RelDataType(org.apache.calcite.rel.type.RelDataType) FlinkStatistic(org.apache.flink.table.planner.plan.stats.FlinkStatistic) RexBuilder(org.apache.calcite.rex.RexBuilder) List(java.util.List) ArrayList(java.util.ArrayList) TableSourceTable(org.apache.flink.table.planner.plan.schema.TableSourceTable) ObjectIdentifier(org.apache.flink.table.catalog.ObjectIdentifier) TableException(org.apache.flink.table.api.TableException) RelBuilder(org.apache.calcite.tools.RelBuilder) FlinkContext(org.apache.flink.table.planner.calcite.FlinkContext) TableStats(org.apache.flink.table.plan.stats.TableStats) LogicalTableScan(org.apache.calcite.rel.logical.LogicalTableScan) Catalog(org.apache.flink.table.catalog.Catalog) PartitionPushDownSpec(org.apache.flink.table.planner.plan.abilities.source.PartitionPushDownSpec) Filter(org.apache.calcite.rel.core.Filter) Map(java.util.Map) Seq(scala.collection.Seq) DynamicTableSource(org.apache.flink.table.connector.source.DynamicTableSource) RexNode(org.apache.calcite.rex.RexNode)

Example 12 with Seq

use of scala.collection.Seq in project kafka by apache.

the class TopicBasedRemoteLogMetadataManagerTest method testNewPartitionUpdates.

@Test
public void testNewPartitionUpdates() throws Exception {
    // Create topics.
    String leaderTopic = "new-leader";
    HashMap<Object, Seq<Object>> assignedLeaderTopicReplicas = new HashMap<>();
    List<Object> leaderTopicReplicas = new ArrayList<>();
    // Set broker id 0 as the first entry which is taken as the leader.
    leaderTopicReplicas.add(0);
    leaderTopicReplicas.add(1);
    leaderTopicReplicas.add(2);
    assignedLeaderTopicReplicas.put(0, JavaConverters.asScalaBuffer(leaderTopicReplicas));
    remoteLogMetadataManagerHarness.createTopicWithAssignment(leaderTopic, JavaConverters.mapAsScalaMap(assignedLeaderTopicReplicas), remoteLogMetadataManagerHarness.listenerName());
    String followerTopic = "new-follower";
    HashMap<Object, Seq<Object>> assignedFollowerTopicReplicas = new HashMap<>();
    List<Object> followerTopicReplicas = new ArrayList<>();
    // Set broker id 1 as the first entry which is taken as the leader.
    followerTopicReplicas.add(1);
    followerTopicReplicas.add(2);
    followerTopicReplicas.add(0);
    assignedFollowerTopicReplicas.put(0, JavaConverters.asScalaBuffer(followerTopicReplicas));
    remoteLogMetadataManagerHarness.createTopicWithAssignment(followerTopic, JavaConverters.mapAsScalaMap(assignedFollowerTopicReplicas), remoteLogMetadataManagerHarness.listenerName());
    final TopicIdPartition newLeaderTopicIdPartition = new TopicIdPartition(Uuid.randomUuid(), new TopicPartition(leaderTopic, 0));
    final TopicIdPartition newFollowerTopicIdPartition = new TopicIdPartition(Uuid.randomUuid(), new TopicPartition(followerTopic, 0));
    // Add segments for these partitions but an exception is received as they have not yet been subscribed.
    // These messages would have been published to the respective metadata topic partitions but the ConsumerManager
    // has not yet been subscribing as they are not yet registered.
    RemoteLogSegmentMetadata leaderSegmentMetadata = new RemoteLogSegmentMetadata(new RemoteLogSegmentId(newLeaderTopicIdPartition, Uuid.randomUuid()), 0, 100, -1L, 0, time.milliseconds(), SEG_SIZE, Collections.singletonMap(0, 0L));
    Assertions.assertThrows(Exception.class, () -> topicBasedRlmm().addRemoteLogSegmentMetadata(leaderSegmentMetadata).get());
    RemoteLogSegmentMetadata followerSegmentMetadata = new RemoteLogSegmentMetadata(new RemoteLogSegmentId(newFollowerTopicIdPartition, Uuid.randomUuid()), 0, 100, -1L, 0, time.milliseconds(), SEG_SIZE, Collections.singletonMap(0, 0L));
    Assertions.assertThrows(Exception.class, () -> topicBasedRlmm().addRemoteLogSegmentMetadata(followerSegmentMetadata).get());
    // `listRemoteLogSegments` will receive an exception as these topic partitions are not yet registered.
    Assertions.assertThrows(RemoteResourceNotFoundException.class, () -> topicBasedRlmm().listRemoteLogSegments(newLeaderTopicIdPartition));
    Assertions.assertThrows(RemoteResourceNotFoundException.class, () -> topicBasedRlmm().listRemoteLogSegments(newFollowerTopicIdPartition));
    topicBasedRlmm().onPartitionLeadershipChanges(Collections.singleton(newLeaderTopicIdPartition), Collections.singleton(newFollowerTopicIdPartition));
    // RemoteLogSegmentMetadata events are already published, and topicBasedRlmm's consumer manager will start
    // fetching those events and build the cache.
    waitUntilConsumerCatchesup(newLeaderTopicIdPartition, newFollowerTopicIdPartition, 30_000L);
    Assertions.assertTrue(topicBasedRlmm().listRemoteLogSegments(newLeaderTopicIdPartition).hasNext());
    Assertions.assertTrue(topicBasedRlmm().listRemoteLogSegments(newFollowerTopicIdPartition).hasNext());
}
Also used : HashMap(java.util.HashMap) TopicPartition(org.apache.kafka.common.TopicPartition) ArrayList(java.util.ArrayList) TopicIdPartition(org.apache.kafka.common.TopicIdPartition) RemoteLogSegmentId(org.apache.kafka.server.log.remote.storage.RemoteLogSegmentId) Seq(scala.collection.Seq) RemoteLogSegmentMetadata(org.apache.kafka.server.log.remote.storage.RemoteLogSegmentMetadata) Test(org.junit.jupiter.api.Test)

Example 13 with Seq

use of scala.collection.Seq in project incubator-systemml by apache.

the class MLContextTest method testInputTupleSeqNoMetadataDML.

@SuppressWarnings({ "rawtypes", "unchecked" })
@Test
public void testInputTupleSeqNoMetadataDML() {
    System.out.println("MLContextTest - Tuple sequence no metadata DML");
    List<String> list1 = new ArrayList<String>();
    list1.add("1,2");
    list1.add("3,4");
    JavaRDD<String> javaRDD1 = sc.parallelize(list1);
    RDD<String> rdd1 = JavaRDD.toRDD(javaRDD1);
    List<String> list2 = new ArrayList<String>();
    list2.add("5,6");
    list2.add("7,8");
    JavaRDD<String> javaRDD2 = sc.parallelize(list2);
    RDD<String> rdd2 = JavaRDD.toRDD(javaRDD2);
    Tuple2 tuple1 = new Tuple2("m1", rdd1);
    Tuple2 tuple2 = new Tuple2("m2", rdd2);
    List tupleList = new ArrayList();
    tupleList.add(tuple1);
    tupleList.add(tuple2);
    Seq seq = JavaConversions.asScalaBuffer(tupleList).toSeq();
    Script script = dml("print('sums: ' + sum(m1) + ' ' + sum(m2));").in(seq);
    setExpectedStdOut("sums: 10.0 26.0");
    ml.execute(script);
}
Also used : Script(org.apache.sysml.api.mlcontext.Script) Tuple2(scala.Tuple2) ArrayList(java.util.ArrayList) List(java.util.List) ArrayList(java.util.ArrayList) Seq(scala.collection.Seq) Test(org.junit.Test)

Example 14 with Seq

use of scala.collection.Seq in project incubator-systemml by apache.

the class MLContextTest method testInputTupleSeqWithMetadataPYDML.

@SuppressWarnings({ "rawtypes", "unchecked" })
@Test
public void testInputTupleSeqWithMetadataPYDML() {
    System.out.println("MLContextTest - Tuple sequence with metadata PYDML");
    List<String> list1 = new ArrayList<String>();
    list1.add("1,2");
    list1.add("3,4");
    JavaRDD<String> javaRDD1 = sc.parallelize(list1);
    RDD<String> rdd1 = JavaRDD.toRDD(javaRDD1);
    List<String> list2 = new ArrayList<String>();
    list2.add("5,6");
    list2.add("7,8");
    JavaRDD<String> javaRDD2 = sc.parallelize(list2);
    RDD<String> rdd2 = JavaRDD.toRDD(javaRDD2);
    MatrixMetadata mm1 = new MatrixMetadata(2, 2);
    MatrixMetadata mm2 = new MatrixMetadata(2, 2);
    Tuple3 tuple1 = new Tuple3("m1", rdd1, mm1);
    Tuple3 tuple2 = new Tuple3("m2", rdd2, mm2);
    List tupleList = new ArrayList();
    tupleList.add(tuple1);
    tupleList.add(tuple2);
    Seq seq = JavaConversions.asScalaBuffer(tupleList).toSeq();
    Script script = pydml("print('sums: ' + sum(m1) + ' ' + sum(m2))").in(seq);
    setExpectedStdOut("sums: 10.0 26.0");
    ml.execute(script);
}
Also used : Script(org.apache.sysml.api.mlcontext.Script) Tuple3(scala.Tuple3) ArrayList(java.util.ArrayList) List(java.util.List) ArrayList(java.util.ArrayList) MatrixMetadata(org.apache.sysml.api.mlcontext.MatrixMetadata) Seq(scala.collection.Seq) Test(org.junit.Test)

Example 15 with Seq

use of scala.collection.Seq in project incubator-systemml by apache.

the class MLContextTest method testOutputScalaSeqDML.

@SuppressWarnings({ "unchecked", "rawtypes" })
@Test
public void testOutputScalaSeqDML() {
    System.out.println("MLContextTest - output specified as Scala Seq DML");
    List outputs = Arrays.asList("x", "y");
    Seq seq = JavaConversions.asScalaBuffer(outputs).toSeq();
    Script script = dml("a=1;x=a+1;y=x+1").out(seq);
    MLResults results = ml.execute(script);
    Assert.assertEquals(2, results.getLong("x"));
    Assert.assertEquals(3, results.getLong("y"));
}
Also used : Script(org.apache.sysml.api.mlcontext.Script) MLResults(org.apache.sysml.api.mlcontext.MLResults) List(java.util.List) ArrayList(java.util.ArrayList) Seq(scala.collection.Seq) Test(org.junit.Test)

Aggregations

Seq (scala.collection.Seq)20 ArrayList (java.util.ArrayList)18 List (java.util.List)14 Script (org.apache.sysml.api.mlcontext.Script)12 Test (org.junit.Test)12 Tuple2 (scala.Tuple2)6 Tuple3 (scala.Tuple3)5 MLResults (org.apache.sysml.api.mlcontext.MLResults)4 MatrixMetadata (org.apache.sysml.api.mlcontext.MatrixMetadata)4 AggregateCall (org.apache.calcite.rel.core.AggregateCall)3 HashMap (java.util.HashMap)2 RelTraitSet (org.apache.calcite.plan.RelTraitSet)2 RelCollation (org.apache.calcite.rel.RelCollation)2 RelNode (org.apache.calcite.rel.RelNode)2 RexInputRef (org.apache.calcite.rex.RexInputRef)2 RexNode (org.apache.calcite.rex.RexNode)2 UserDefinedFunction (org.apache.flink.table.functions.UserDefinedFunction)2 FlinkRelDistribution (org.apache.flink.table.planner.plan.trait.FlinkRelDistribution)2 DataType (org.apache.flink.table.types.DataType)2 Option (scala.Option)2