use of scala.collection.Seq in project flink by apache.
the class PushPartitionIntoTableSourceScanRule method onMatch.
@Override
public void onMatch(RelOptRuleCall call) {
Filter filter = call.rel(0);
LogicalTableScan scan = call.rel(1);
TableSourceTable tableSourceTable = scan.getTable().unwrap(TableSourceTable.class);
RelDataType inputFieldTypes = filter.getInput().getRowType();
List<String> inputFieldNames = inputFieldTypes.getFieldNames();
List<String> partitionFieldNames = tableSourceTable.contextResolvedTable().<ResolvedCatalogTable>getResolvedTable().getPartitionKeys();
// extract partition predicates
RelBuilder relBuilder = call.builder();
RexBuilder rexBuilder = relBuilder.getRexBuilder();
Tuple2<Seq<RexNode>, Seq<RexNode>> allPredicates = RexNodeExtractor.extractPartitionPredicateList(filter.getCondition(), FlinkRelOptUtil.getMaxCnfNodeCount(scan), inputFieldNames.toArray(new String[0]), rexBuilder, partitionFieldNames.toArray(new String[0]));
RexNode partitionPredicate = RexUtil.composeConjunction(rexBuilder, JavaConversions.seqAsJavaList(allPredicates._1));
if (partitionPredicate.isAlwaysTrue()) {
return;
}
// build pruner
LogicalType[] partitionFieldTypes = partitionFieldNames.stream().map(name -> {
int index = inputFieldNames.indexOf(name);
if (index < 0) {
throw new TableException(String.format("Partitioned key '%s' isn't found in input columns. " + "Validator should have checked that.", name));
}
return inputFieldTypes.getFieldList().get(index).getType();
}).map(FlinkTypeFactory::toLogicalType).toArray(LogicalType[]::new);
RexNode finalPartitionPredicate = adjustPartitionPredicate(inputFieldNames, partitionFieldNames, partitionPredicate);
FlinkContext context = ShortcutUtils.unwrapContext(scan);
Function<List<Map<String, String>>, List<Map<String, String>>> defaultPruner = partitions -> PartitionPruner.prunePartitions(context.getTableConfig(), partitionFieldNames.toArray(new String[0]), partitionFieldTypes, partitions, finalPartitionPredicate);
// prune partitions
List<Map<String, String>> remainingPartitions = readPartitionsAndPrune(rexBuilder, context, tableSourceTable, defaultPruner, allPredicates._1(), inputFieldNames);
// apply push down
DynamicTableSource dynamicTableSource = tableSourceTable.tableSource().copy();
PartitionPushDownSpec partitionPushDownSpec = new PartitionPushDownSpec(remainingPartitions);
partitionPushDownSpec.apply(dynamicTableSource, SourceAbilityContext.from(scan));
// build new statistic
TableStats newTableStat = null;
if (tableSourceTable.contextResolvedTable().isPermanent()) {
ObjectIdentifier identifier = tableSourceTable.contextResolvedTable().getIdentifier();
ObjectPath tablePath = identifier.toObjectPath();
Catalog catalog = tableSourceTable.contextResolvedTable().getCatalog().get();
for (Map<String, String> partition : remainingPartitions) {
Optional<TableStats> partitionStats = getPartitionStats(catalog, tablePath, partition);
if (!partitionStats.isPresent()) {
// clear all information before
newTableStat = null;
break;
} else {
newTableStat = newTableStat == null ? partitionStats.get() : newTableStat.merge(partitionStats.get());
}
}
}
FlinkStatistic newStatistic = FlinkStatistic.builder().statistic(tableSourceTable.getStatistic()).tableStats(newTableStat).build();
TableSourceTable newTableSourceTable = tableSourceTable.copy(dynamicTableSource, newStatistic, new SourceAbilitySpec[] { partitionPushDownSpec });
LogicalTableScan newScan = LogicalTableScan.create(scan.getCluster(), newTableSourceTable, scan.getHints());
// transform to new node
RexNode nonPartitionPredicate = RexUtil.composeConjunction(rexBuilder, JavaConversions.seqAsJavaList(allPredicates._2()));
if (nonPartitionPredicate.isAlwaysTrue()) {
call.transformTo(newScan);
} else {
Filter newFilter = filter.copy(filter.getTraitSet(), newScan, nonPartitionPredicate);
call.transformTo(newFilter);
}
}
use of scala.collection.Seq in project kafka by apache.
the class TopicBasedRemoteLogMetadataManagerTest method testNewPartitionUpdates.
@Test
public void testNewPartitionUpdates() throws Exception {
// Create topics.
String leaderTopic = "new-leader";
HashMap<Object, Seq<Object>> assignedLeaderTopicReplicas = new HashMap<>();
List<Object> leaderTopicReplicas = new ArrayList<>();
// Set broker id 0 as the first entry which is taken as the leader.
leaderTopicReplicas.add(0);
leaderTopicReplicas.add(1);
leaderTopicReplicas.add(2);
assignedLeaderTopicReplicas.put(0, JavaConverters.asScalaBuffer(leaderTopicReplicas));
remoteLogMetadataManagerHarness.createTopicWithAssignment(leaderTopic, JavaConverters.mapAsScalaMap(assignedLeaderTopicReplicas), remoteLogMetadataManagerHarness.listenerName());
String followerTopic = "new-follower";
HashMap<Object, Seq<Object>> assignedFollowerTopicReplicas = new HashMap<>();
List<Object> followerTopicReplicas = new ArrayList<>();
// Set broker id 1 as the first entry which is taken as the leader.
followerTopicReplicas.add(1);
followerTopicReplicas.add(2);
followerTopicReplicas.add(0);
assignedFollowerTopicReplicas.put(0, JavaConverters.asScalaBuffer(followerTopicReplicas));
remoteLogMetadataManagerHarness.createTopicWithAssignment(followerTopic, JavaConverters.mapAsScalaMap(assignedFollowerTopicReplicas), remoteLogMetadataManagerHarness.listenerName());
final TopicIdPartition newLeaderTopicIdPartition = new TopicIdPartition(Uuid.randomUuid(), new TopicPartition(leaderTopic, 0));
final TopicIdPartition newFollowerTopicIdPartition = new TopicIdPartition(Uuid.randomUuid(), new TopicPartition(followerTopic, 0));
// Add segments for these partitions but an exception is received as they have not yet been subscribed.
// These messages would have been published to the respective metadata topic partitions but the ConsumerManager
// has not yet been subscribing as they are not yet registered.
RemoteLogSegmentMetadata leaderSegmentMetadata = new RemoteLogSegmentMetadata(new RemoteLogSegmentId(newLeaderTopicIdPartition, Uuid.randomUuid()), 0, 100, -1L, 0, time.milliseconds(), SEG_SIZE, Collections.singletonMap(0, 0L));
Assertions.assertThrows(Exception.class, () -> topicBasedRlmm().addRemoteLogSegmentMetadata(leaderSegmentMetadata).get());
RemoteLogSegmentMetadata followerSegmentMetadata = new RemoteLogSegmentMetadata(new RemoteLogSegmentId(newFollowerTopicIdPartition, Uuid.randomUuid()), 0, 100, -1L, 0, time.milliseconds(), SEG_SIZE, Collections.singletonMap(0, 0L));
Assertions.assertThrows(Exception.class, () -> topicBasedRlmm().addRemoteLogSegmentMetadata(followerSegmentMetadata).get());
// `listRemoteLogSegments` will receive an exception as these topic partitions are not yet registered.
Assertions.assertThrows(RemoteResourceNotFoundException.class, () -> topicBasedRlmm().listRemoteLogSegments(newLeaderTopicIdPartition));
Assertions.assertThrows(RemoteResourceNotFoundException.class, () -> topicBasedRlmm().listRemoteLogSegments(newFollowerTopicIdPartition));
topicBasedRlmm().onPartitionLeadershipChanges(Collections.singleton(newLeaderTopicIdPartition), Collections.singleton(newFollowerTopicIdPartition));
// RemoteLogSegmentMetadata events are already published, and topicBasedRlmm's consumer manager will start
// fetching those events and build the cache.
waitUntilConsumerCatchesup(newLeaderTopicIdPartition, newFollowerTopicIdPartition, 30_000L);
Assertions.assertTrue(topicBasedRlmm().listRemoteLogSegments(newLeaderTopicIdPartition).hasNext());
Assertions.assertTrue(topicBasedRlmm().listRemoteLogSegments(newFollowerTopicIdPartition).hasNext());
}
use of scala.collection.Seq in project incubator-systemml by apache.
the class MLContextTest method testInputTupleSeqNoMetadataDML.
@SuppressWarnings({ "rawtypes", "unchecked" })
@Test
public void testInputTupleSeqNoMetadataDML() {
System.out.println("MLContextTest - Tuple sequence no metadata DML");
List<String> list1 = new ArrayList<String>();
list1.add("1,2");
list1.add("3,4");
JavaRDD<String> javaRDD1 = sc.parallelize(list1);
RDD<String> rdd1 = JavaRDD.toRDD(javaRDD1);
List<String> list2 = new ArrayList<String>();
list2.add("5,6");
list2.add("7,8");
JavaRDD<String> javaRDD2 = sc.parallelize(list2);
RDD<String> rdd2 = JavaRDD.toRDD(javaRDD2);
Tuple2 tuple1 = new Tuple2("m1", rdd1);
Tuple2 tuple2 = new Tuple2("m2", rdd2);
List tupleList = new ArrayList();
tupleList.add(tuple1);
tupleList.add(tuple2);
Seq seq = JavaConversions.asScalaBuffer(tupleList).toSeq();
Script script = dml("print('sums: ' + sum(m1) + ' ' + sum(m2));").in(seq);
setExpectedStdOut("sums: 10.0 26.0");
ml.execute(script);
}
use of scala.collection.Seq in project incubator-systemml by apache.
the class MLContextTest method testInputTupleSeqWithMetadataPYDML.
@SuppressWarnings({ "rawtypes", "unchecked" })
@Test
public void testInputTupleSeqWithMetadataPYDML() {
System.out.println("MLContextTest - Tuple sequence with metadata PYDML");
List<String> list1 = new ArrayList<String>();
list1.add("1,2");
list1.add("3,4");
JavaRDD<String> javaRDD1 = sc.parallelize(list1);
RDD<String> rdd1 = JavaRDD.toRDD(javaRDD1);
List<String> list2 = new ArrayList<String>();
list2.add("5,6");
list2.add("7,8");
JavaRDD<String> javaRDD2 = sc.parallelize(list2);
RDD<String> rdd2 = JavaRDD.toRDD(javaRDD2);
MatrixMetadata mm1 = new MatrixMetadata(2, 2);
MatrixMetadata mm2 = new MatrixMetadata(2, 2);
Tuple3 tuple1 = new Tuple3("m1", rdd1, mm1);
Tuple3 tuple2 = new Tuple3("m2", rdd2, mm2);
List tupleList = new ArrayList();
tupleList.add(tuple1);
tupleList.add(tuple2);
Seq seq = JavaConversions.asScalaBuffer(tupleList).toSeq();
Script script = pydml("print('sums: ' + sum(m1) + ' ' + sum(m2))").in(seq);
setExpectedStdOut("sums: 10.0 26.0");
ml.execute(script);
}
use of scala.collection.Seq in project incubator-systemml by apache.
the class MLContextTest method testOutputScalaSeqDML.
@SuppressWarnings({ "unchecked", "rawtypes" })
@Test
public void testOutputScalaSeqDML() {
System.out.println("MLContextTest - output specified as Scala Seq DML");
List outputs = Arrays.asList("x", "y");
Seq seq = JavaConversions.asScalaBuffer(outputs).toSeq();
Script script = dml("a=1;x=a+1;y=x+1").out(seq);
MLResults results = ml.execute(script);
Assert.assertEquals(2, results.getLong("x"));
Assert.assertEquals(3, results.getLong("y"));
}
Aggregations