Search in sources :

Example 6 with PlannerBase

use of org.apache.flink.table.planner.delegation.PlannerBase in project flink by apache.

the class HiveTableSourceITCase method testParallelismWithoutParallelismInfer.

@Test
public void testParallelismWithoutParallelismInfer() throws Exception {
    final String dbName = "source_db";
    final String tblName = "test_parallelism_no_infer";
    TableEnvironment tEnv = TableEnvironment.create(EnvironmentSettings.inBatchMode());
    tEnv.getConfig().setSqlDialect(SqlDialect.HIVE);
    tEnv.registerCatalog("hive", hiveCatalog);
    tEnv.useCatalog("hive");
    tEnv.getConfig().getConfiguration().setBoolean(HiveOptions.TABLE_EXEC_HIVE_INFER_SOURCE_PARALLELISM, false);
    tEnv.executeSql("CREATE TABLE source_db.test_parallelism_no_infer " + "(`year` STRING, `value` INT) partitioned by (pt int)");
    HiveTestUtils.createTextTableInserter(hiveCatalog, dbName, tblName).addRow(new Object[] { "2014", 3 }).addRow(new Object[] { "2014", 4 }).commit("pt=0");
    HiveTestUtils.createTextTableInserter(hiveCatalog, dbName, tblName).addRow(new Object[] { "2015", 2 }).addRow(new Object[] { "2015", 5 }).commit("pt=1");
    Table table = tEnv.sqlQuery("select * from hive.source_db.test_parallelism_no_infer limit 1");
    PlannerBase planner = (PlannerBase) ((TableEnvironmentImpl) tEnv).getPlanner();
    RelNode relNode = planner.optimize(TableTestUtil.toRelNode(table));
    ExecNode<?> execNode = planner.translateToExecNodeGraph(toScala(Collections.singletonList(relNode))).getRootNodes().get(0);
    Transformation<?> transformation = (execNode.translateToPlan(planner).getInputs().get(0)).getInputs().get(0);
    // when there's no infer, should use the default parallelism
    Assert.assertEquals(ExecutionConfigOptions.TABLE_EXEC_RESOURCE_DEFAULT_PARALLELISM.defaultValue().intValue(), transformation.getParallelism());
}
Also used : CatalogTable(org.apache.flink.table.catalog.CatalogTable) Table(org.apache.flink.table.api.Table) RelNode(org.apache.calcite.rel.RelNode) StreamTableEnvironment(org.apache.flink.table.api.bridge.java.StreamTableEnvironment) TableEnvironment(org.apache.flink.table.api.TableEnvironment) PlannerBase(org.apache.flink.table.planner.delegation.PlannerBase) Test(org.junit.Test)

Example 7 with PlannerBase

use of org.apache.flink.table.planner.delegation.PlannerBase in project flink by apache.

the class HiveTableSourceITCase method testParallelismSettingTranslateAndAssert.

private void testParallelismSettingTranslateAndAssert(int expected, Table table, TableEnvironment tEnv) {
    PlannerBase planner = (PlannerBase) ((TableEnvironmentImpl) tEnv).getPlanner();
    RelNode relNode = planner.optimize(TableTestUtil.toRelNode(table));
    ExecNode<?> execNode = planner.translateToExecNodeGraph(toScala(Collections.singletonList(relNode))).getRootNodes().get(0);
    Transformation<?> transformation = execNode.translateToPlan(planner);
    Assert.assertEquals(expected, transformation.getParallelism());
}
Also used : RelNode(org.apache.calcite.rel.RelNode) PlannerBase(org.apache.flink.table.planner.delegation.PlannerBase)

Example 8 with PlannerBase

use of org.apache.flink.table.planner.delegation.PlannerBase in project flink by apache.

the class HiveTableSourceITCase method testParallelismOnLimitPushDown.

@Test
public void testParallelismOnLimitPushDown() throws Exception {
    final String dbName = "source_db";
    final String tblName = "test_parallelism_limit_pushdown";
    TableEnvironment tEnv = createTableEnv();
    tEnv.getConfig().getConfiguration().setBoolean(HiveOptions.TABLE_EXEC_HIVE_INFER_SOURCE_PARALLELISM, false);
    tEnv.getConfig().getConfiguration().setInteger(ExecutionConfigOptions.TABLE_EXEC_RESOURCE_DEFAULT_PARALLELISM, 2);
    tEnv.executeSql("CREATE TABLE source_db.test_parallelism_limit_pushdown " + "(`year` STRING, `value` INT) partitioned by (pt int)");
    HiveTestUtils.createTextTableInserter(hiveCatalog, dbName, tblName).addRow(new Object[] { "2014", 3 }).addRow(new Object[] { "2014", 4 }).commit("pt=0");
    HiveTestUtils.createTextTableInserter(hiveCatalog, dbName, tblName).addRow(new Object[] { "2015", 2 }).addRow(new Object[] { "2015", 5 }).commit("pt=1");
    Table table = tEnv.sqlQuery("select * from hive.source_db.test_parallelism_limit_pushdown limit 1");
    PlannerBase planner = (PlannerBase) ((TableEnvironmentImpl) tEnv).getPlanner();
    RelNode relNode = planner.optimize(TableTestUtil.toRelNode(table));
    ExecNode<?> execNode = planner.translateToExecNodeGraph(toScala(Collections.singletonList(relNode))).getRootNodes().get(0);
    Transformation<?> transformation = (execNode.translateToPlan(planner).getInputs().get(0)).getInputs().get(0);
    // when there's no infer, should use the default parallelism configured
    Assert.assertEquals(2, transformation.getParallelism());
}
Also used : CatalogTable(org.apache.flink.table.catalog.CatalogTable) Table(org.apache.flink.table.api.Table) RelNode(org.apache.calcite.rel.RelNode) StreamTableEnvironment(org.apache.flink.table.api.bridge.java.StreamTableEnvironment) TableEnvironment(org.apache.flink.table.api.TableEnvironment) PlannerBase(org.apache.flink.table.planner.delegation.PlannerBase) Test(org.junit.Test)

Example 9 with PlannerBase

use of org.apache.flink.table.planner.delegation.PlannerBase in project flink by apache.

the class StreamExecSink method translateToPlanInternal.

@SuppressWarnings("unchecked")
@Override
protected Transformation<Object> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
    final ExecEdge inputEdge = getInputEdges().get(0);
    final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
    final RowType inputRowType = (RowType) inputEdge.getOutputType();
    final DynamicTableSink tableSink = tableSinkSpec.getTableSink(planner.getFlinkContext());
    final boolean isCollectSink = tableSink instanceof CollectDynamicSink;
    final List<Integer> rowtimeFieldIndices = new ArrayList<>();
    for (int i = 0; i < inputRowType.getFieldCount(); ++i) {
        if (TypeCheckUtils.isRowTime(inputRowType.getTypeAt(i))) {
            rowtimeFieldIndices.add(i);
        }
    }
    final int rowtimeFieldIndex;
    if (rowtimeFieldIndices.size() > 1 && !isCollectSink) {
        throw new TableException(String.format("The query contains more than one rowtime attribute column [%s] for writing into table '%s'.\n" + "Please select the column that should be used as the event-time timestamp " + "for the table sink by casting all other columns to regular TIMESTAMP or TIMESTAMP_LTZ.", rowtimeFieldIndices.stream().map(i -> inputRowType.getFieldNames().get(i)).collect(Collectors.joining(", ")), tableSinkSpec.getContextResolvedTable().getIdentifier().asSummaryString()));
    } else if (rowtimeFieldIndices.size() == 1) {
        rowtimeFieldIndex = rowtimeFieldIndices.get(0);
    } else {
        rowtimeFieldIndex = -1;
    }
    return createSinkTransformation(planner.getExecEnv(), config, inputTransform, tableSink, rowtimeFieldIndex, upsertMaterialize);
}
Also used : InputProperty(org.apache.flink.table.planner.plan.nodes.exec.InputProperty) JsonCreator(org.apache.flink.shaded.jackson2.com.fasterxml.jackson.annotation.JsonCreator) ChangelogMode(org.apache.flink.table.connector.ChangelogMode) CollectDynamicSink(org.apache.flink.table.planner.connectors.CollectDynamicSink) RowType(org.apache.flink.table.types.logical.RowType) JsonInclude(org.apache.flink.shaded.jackson2.com.fasterxml.jackson.annotation.JsonInclude) ExecNode(org.apache.flink.table.planner.plan.nodes.exec.ExecNode) ArrayList(java.util.ArrayList) DynamicTableSinkSpec(org.apache.flink.table.planner.plan.nodes.exec.spec.DynamicTableSinkSpec) FlinkVersion(org.apache.flink.FlinkVersion) TypeCheckUtils(org.apache.flink.table.runtime.typeutils.TypeCheckUtils) ExecNodeContext(org.apache.flink.table.planner.plan.nodes.exec.ExecNodeContext) RowData(org.apache.flink.table.data.RowData) DynamicTableSink(org.apache.flink.table.connector.sink.DynamicTableSink) PlannerBase(org.apache.flink.table.planner.delegation.PlannerBase) ExecNodeMetadata(org.apache.flink.table.planner.plan.nodes.exec.ExecNodeMetadata) ExecNodeConfig(org.apache.flink.table.planner.plan.nodes.exec.ExecNodeConfig) TableException(org.apache.flink.table.api.TableException) CommonExecSink(org.apache.flink.table.planner.plan.nodes.exec.common.CommonExecSink) Collectors(java.util.stream.Collectors) JsonProperty(org.apache.flink.shaded.jackson2.com.fasterxml.jackson.annotation.JsonProperty) List(java.util.List) ExecEdge(org.apache.flink.table.planner.plan.nodes.exec.ExecEdge) LogicalType(org.apache.flink.table.types.logical.LogicalType) Transformation(org.apache.flink.api.dag.Transformation) Collections(java.util.Collections) Transformation(org.apache.flink.api.dag.Transformation) TableException(org.apache.flink.table.api.TableException) ExecEdge(org.apache.flink.table.planner.plan.nodes.exec.ExecEdge) ArrayList(java.util.ArrayList) RowType(org.apache.flink.table.types.logical.RowType) DynamicTableSink(org.apache.flink.table.connector.sink.DynamicTableSink) CollectDynamicSink(org.apache.flink.table.planner.connectors.CollectDynamicSink) RowData(org.apache.flink.table.data.RowData)

Example 10 with PlannerBase

use of org.apache.flink.table.planner.delegation.PlannerBase in project flink by apache.

the class CatalogStatisticsTest method testGetPartitionStatsWithSomeUnknownColumnStats.

@Test
public void testGetPartitionStatsWithSomeUnknownColumnStats() throws Exception {
    TestPartitionableSourceFactory.createTemporaryTable(tEnv, "PartT", true);
    createPartitionStats("A", 1);
    createPartitionColumnStats("A", 1, true);
    createPartitionStats("A", 2);
    createPartitionColumnStats("A", 2);
    RelNode t1 = ((PlannerBase) ((TableEnvironmentImpl) tEnv).getPlanner()).optimize(TableTestUtil.toRelNode(tEnv.sqlQuery("select id, name from PartT where part1 = 'A'")));
    FlinkRelMetadataQuery mq = FlinkRelMetadataQuery.reuseOrCreate(t1.getCluster().getMetadataQuery());
    assertEquals(200.0, mq.getRowCount(t1), 0.0);
    // long type
    assertNull(mq.getDistinctRowCount(t1, ImmutableBitSet.of(0), null));
    assertNull(mq.getColumnNullCount(t1, 0));
    assertNull(mq.getColumnInterval(t1, 0));
    // string type
    assertNull(mq.getDistinctRowCount(t1, ImmutableBitSet.of(1), null));
    assertNull(mq.getColumnNullCount(t1, 1));
}
Also used : RelNode(org.apache.calcite.rel.RelNode) PlannerBase(org.apache.flink.table.planner.delegation.PlannerBase) FlinkRelMetadataQuery(org.apache.flink.table.planner.plan.metadata.FlinkRelMetadataQuery) Test(org.junit.Test)

Aggregations

PlannerBase (org.apache.flink.table.planner.delegation.PlannerBase)12 RelNode (org.apache.calcite.rel.RelNode)7 Test (org.junit.Test)5 List (java.util.List)4 Transformation (org.apache.flink.api.dag.Transformation)4 RowData (org.apache.flink.table.data.RowData)4 ExecNode (org.apache.flink.table.planner.plan.nodes.exec.ExecNode)4 ExecNodeConfig (org.apache.flink.table.planner.plan.nodes.exec.ExecNodeConfig)4 ExecNodeContext (org.apache.flink.table.planner.plan.nodes.exec.ExecNodeContext)4 Collections (java.util.Collections)3 Collectors (java.util.stream.Collectors)3 FlinkRelMetadataQuery (org.apache.flink.table.planner.plan.metadata.FlinkRelMetadataQuery)3 ExecEdge (org.apache.flink.table.planner.plan.nodes.exec.ExecEdge)3 InputProperty (org.apache.flink.table.planner.plan.nodes.exec.InputProperty)3 LogicalType (org.apache.flink.table.types.logical.LogicalType)3 RowType (org.apache.flink.table.types.logical.RowType)3 ArrayList (java.util.ArrayList)2 Arrays (java.util.Arrays)2 FlinkVersion (org.apache.flink.FlinkVersion)2 JsonCreator (org.apache.flink.shaded.jackson2.com.fasterxml.jackson.annotation.JsonCreator)2