Search in sources :

Example 1 with TableFunction

use of org.apache.flink.table.functions.TableFunction in project flink by apache.

the class HiveLookupTableSource method getLookupFunction.

private TableFunction<RowData> getLookupFunction(int[] keys) {
    final String defaultPartitionName = JobConfUtils.getDefaultPartitionName(jobConf);
    PartitionFetcher.Context<HiveTablePartition> fetcherContext = new HiveTablePartitionFetcherContext(tablePath, hiveShim, new JobConfWrapper(jobConf), catalogTable.getPartitionKeys(), getProducedTableSchema().getFieldDataTypes(), getProducedTableSchema().getFieldNames(), configuration, defaultPartitionName);
    final PartitionFetcher<HiveTablePartition> partitionFetcher;
    // avoid lambda capture
    final ObjectPath tableFullPath = tablePath;
    if (catalogTable.getPartitionKeys().isEmpty()) {
        // non-partitioned table, the fetcher fetches the partition which represents the given
        // table.
        partitionFetcher = context -> {
            List<HiveTablePartition> partValueList = new ArrayList<>();
            partValueList.add(context.getPartition(new ArrayList<>()).orElseThrow(() -> new IllegalArgumentException(String.format("Fetch partition fail for hive table %s.", tableFullPath))));
            return partValueList;
        };
    } else if (isStreamingSource()) {
        // streaming-read partitioned table, the fetcher fetches the latest partition of the
        // given table.
        partitionFetcher = context -> {
            List<HiveTablePartition> partValueList = new ArrayList<>();
            List<PartitionFetcher.Context.ComparablePartitionValue> comparablePartitionValues = context.getComparablePartitionValueList();
            // fetch latest partitions for partitioned table
            if (comparablePartitionValues.size() > 0) {
                // sort in desc order
                comparablePartitionValues.sort((o1, o2) -> o2.getComparator().compareTo(o1.getComparator()));
                PartitionFetcher.Context.ComparablePartitionValue maxPartition = comparablePartitionValues.get(0);
                partValueList.add(context.getPartition((List<String>) maxPartition.getPartitionValue()).orElseThrow(() -> new IllegalArgumentException(String.format("Fetch partition fail for hive table %s.", tableFullPath))));
            } else {
                throw new IllegalArgumentException(String.format("At least one partition is required when set '%s' to 'latest' in temporal join," + " but actual partition number is '%s' for hive table %s", STREAMING_SOURCE_PARTITION_INCLUDE.key(), comparablePartitionValues.size(), tableFullPath));
            }
            return partValueList;
        };
    } else {
        // bounded-read partitioned table, the fetcher fetches all partitions of the given
        // filesystem table.
        partitionFetcher = context -> {
            List<HiveTablePartition> partValueList = new ArrayList<>();
            List<PartitionFetcher.Context.ComparablePartitionValue> comparablePartitionValues = context.getComparablePartitionValueList();
            for (PartitionFetcher.Context.ComparablePartitionValue comparablePartitionValue : comparablePartitionValues) {
                partValueList.add(context.getPartition((List<String>) comparablePartitionValue.getPartitionValue()).orElseThrow(() -> new IllegalArgumentException(String.format("Fetch partition fail for hive table %s.", tableFullPath))));
            }
            return partValueList;
        };
    }
    PartitionReader<HiveTablePartition, RowData> partitionReader = new HiveInputFormatPartitionReader(flinkConf, jobConf, hiveVersion, tablePath, getProducedTableSchema().getFieldDataTypes(), getProducedTableSchema().getFieldNames(), catalogTable.getPartitionKeys(), projectedFields, flinkConf.get(HiveOptions.TABLE_EXEC_HIVE_FALLBACK_MAPRED_READER));
    return new FileSystemLookupFunction<>(partitionFetcher, fetcherContext, partitionReader, (RowType) getProducedTableSchema().toRowDataType().getLogicalType(), keys, hiveTableReloadInterval);
}
Also used : HivePartitionUtils(org.apache.flink.connectors.hive.util.HivePartitionUtils) TableFunction(org.apache.flink.table.functions.TableFunction) PartitionReader(org.apache.flink.connector.file.table.PartitionReader) DataType(org.apache.flink.table.types.DataType) CatalogTable(org.apache.flink.table.catalog.CatalogTable) LoggerFactory(org.slf4j.LoggerFactory) STREAMING_SOURCE_PARTITION_INCLUDE(org.apache.flink.connectors.hive.HiveOptions.STREAMING_SOURCE_PARTITION_INCLUDE) HiveInputFormatPartitionReader(org.apache.flink.connectors.hive.read.HiveInputFormatPartitionReader) JobConfUtils(org.apache.flink.connectors.hive.util.JobConfUtils) RowType(org.apache.flink.table.types.logical.RowType) ObjectPath(org.apache.flink.table.catalog.ObjectPath) Partition(org.apache.hadoop.hive.metastore.api.Partition) HiveShim(org.apache.flink.table.catalog.hive.client.HiveShim) ArrayList(java.util.ArrayList) LookupTableSource(org.apache.flink.table.connector.source.LookupTableSource) ReadableConfig(org.apache.flink.configuration.ReadableConfig) Duration(java.time.Duration) HivePartitionFetcherContextBase(org.apache.flink.connectors.hive.read.HivePartitionFetcherContextBase) RowData(org.apache.flink.table.data.RowData) Logger(org.slf4j.Logger) STREAMING_SOURCE_CONSUME_START_OFFSET(org.apache.flink.connectors.hive.HiveOptions.STREAMING_SOURCE_CONSUME_START_OFFSET) PartitionFetcher(org.apache.flink.connector.file.table.PartitionFetcher) Configuration(org.apache.flink.configuration.Configuration) Preconditions(org.apache.flink.util.Preconditions) VisibleForTesting(org.apache.flink.annotation.VisibleForTesting) JobConf(org.apache.hadoop.mapred.JobConf) LOOKUP_JOIN_CACHE_TTL(org.apache.flink.connectors.hive.HiveOptions.LOOKUP_JOIN_CACHE_TTL) List(java.util.List) Optional(java.util.Optional) TableFunctionProvider(org.apache.flink.table.connector.source.TableFunctionProvider) STREAMING_SOURCE_MONITOR_INTERVAL(org.apache.flink.connectors.hive.HiveOptions.STREAMING_SOURCE_MONITOR_INTERVAL) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) ObjectPath(org.apache.flink.table.catalog.ObjectPath) HiveInputFormatPartitionReader(org.apache.flink.connectors.hive.read.HiveInputFormatPartitionReader) ArrayList(java.util.ArrayList) RowData(org.apache.flink.table.data.RowData) PartitionFetcher(org.apache.flink.connector.file.table.PartitionFetcher) ArrayList(java.util.ArrayList) List(java.util.List)

Example 2 with TableFunction

use of org.apache.flink.table.functions.TableFunction in project flink by apache.

the class HBaseDynamicTableFactoryTest method testTableSourceFactory.

@SuppressWarnings("rawtypes")
@Test
public void testTableSourceFactory() {
    ResolvedSchema schema = ResolvedSchema.of(Column.physical(FAMILY1, ROW(FIELD(COL1, INT()))), Column.physical(FAMILY2, ROW(FIELD(COL1, INT()), FIELD(COL2, BIGINT()))), Column.physical(ROWKEY, BIGINT()), Column.physical(FAMILY3, ROW(FIELD(COL1, DOUBLE()), FIELD(COL2, BOOLEAN()), FIELD(COL3, STRING()))), Column.physical(FAMILY4, ROW(FIELD(COL1, DECIMAL(10, 3)), FIELD(COL2, TIMESTAMP(3)), FIELD(COL3, DATE()), FIELD(COL4, TIME()))));
    DynamicTableSource source = createTableSource(schema, getAllOptions());
    assertTrue(source instanceof HBaseDynamicTableSource);
    HBaseDynamicTableSource hbaseSource = (HBaseDynamicTableSource) source;
    int[][] lookupKey = { { 2 } };
    LookupTableSource.LookupRuntimeProvider lookupProvider = hbaseSource.getLookupRuntimeProvider(new LookupRuntimeProviderContext(lookupKey));
    assertTrue(lookupProvider instanceof TableFunctionProvider);
    TableFunction tableFunction = ((TableFunctionProvider) lookupProvider).createTableFunction();
    assertTrue(tableFunction instanceof HBaseRowDataLookupFunction);
    assertEquals("testHBastTable", ((HBaseRowDataLookupFunction) tableFunction).getHTableName());
    HBaseTableSchema hbaseSchema = hbaseSource.getHBaseTableSchema();
    assertEquals(2, hbaseSchema.getRowKeyIndex());
    assertEquals(Optional.of(Types.LONG), hbaseSchema.getRowKeyTypeInfo());
    assertArrayEquals(new String[] { "f1", "f2", "f3", "f4" }, hbaseSchema.getFamilyNames());
    assertArrayEquals(new String[] { "c1" }, hbaseSchema.getQualifierNames("f1"));
    assertArrayEquals(new String[] { "c1", "c2" }, hbaseSchema.getQualifierNames("f2"));
    assertArrayEquals(new String[] { "c1", "c2", "c3" }, hbaseSchema.getQualifierNames("f3"));
    assertArrayEquals(new String[] { "c1", "c2", "c3", "c4" }, hbaseSchema.getQualifierNames("f4"));
    assertArrayEquals(new DataType[] { INT() }, hbaseSchema.getQualifierDataTypes("f1"));
    assertArrayEquals(new DataType[] { INT(), BIGINT() }, hbaseSchema.getQualifierDataTypes("f2"));
    assertArrayEquals(new DataType[] { DOUBLE(), BOOLEAN(), STRING() }, hbaseSchema.getQualifierDataTypes("f3"));
    assertArrayEquals(new DataType[] { DECIMAL(10, 3), TIMESTAMP(3), DATE(), TIME() }, hbaseSchema.getQualifierDataTypes("f4"));
}
Also used : HBaseDynamicTableSource(org.apache.flink.connector.hbase1.source.HBaseDynamicTableSource) HBaseRowDataLookupFunction(org.apache.flink.connector.hbase.source.HBaseRowDataLookupFunction) LookupRuntimeProviderContext(org.apache.flink.table.runtime.connector.source.LookupRuntimeProviderContext) LookupTableSource(org.apache.flink.table.connector.source.LookupTableSource) HBaseTableSchema(org.apache.flink.connector.hbase.util.HBaseTableSchema) TableFunction(org.apache.flink.table.functions.TableFunction) ResolvedSchema(org.apache.flink.table.catalog.ResolvedSchema) DynamicTableSource(org.apache.flink.table.connector.source.DynamicTableSource) HBaseDynamicTableSource(org.apache.flink.connector.hbase1.source.HBaseDynamicTableSource) TableFunctionProvider(org.apache.flink.table.connector.source.TableFunctionProvider) Test(org.junit.Test)

Example 3 with TableFunction

use of org.apache.flink.table.functions.TableFunction in project flink by apache.

the class HBaseDynamicTableFactoryTest method testTableSourceFactory.

@SuppressWarnings("rawtypes")
@Test
public void testTableSourceFactory() {
    ResolvedSchema schema = ResolvedSchema.of(Column.physical(FAMILY1, ROW(FIELD(COL1, INT()))), Column.physical(FAMILY2, ROW(FIELD(COL1, INT()), FIELD(COL2, BIGINT()))), Column.physical(ROWKEY, BIGINT()), Column.physical(FAMILY3, ROW(FIELD(COL1, DOUBLE()), FIELD(COL2, BOOLEAN()), FIELD(COL3, STRING()))), Column.physical(FAMILY4, ROW(FIELD(COL1, DECIMAL(10, 3)), FIELD(COL2, TIMESTAMP(3)), FIELD(COL3, DATE()), FIELD(COL4, TIME()))));
    DynamicTableSource source = createTableSource(schema, getAllOptions());
    assertTrue(source instanceof HBaseDynamicTableSource);
    HBaseDynamicTableSource hbaseSource = (HBaseDynamicTableSource) source;
    int[][] lookupKey = { { 2 } };
    LookupTableSource.LookupRuntimeProvider lookupProvider = hbaseSource.getLookupRuntimeProvider(new LookupRuntimeProviderContext(lookupKey));
    assertTrue(lookupProvider instanceof TableFunctionProvider);
    TableFunction tableFunction = ((TableFunctionProvider) lookupProvider).createTableFunction();
    assertTrue(tableFunction instanceof HBaseRowDataLookupFunction);
    assertEquals("testHBastTable", ((HBaseRowDataLookupFunction) tableFunction).getHTableName());
    HBaseTableSchema hbaseSchema = hbaseSource.getHBaseTableSchema();
    assertEquals(2, hbaseSchema.getRowKeyIndex());
    assertEquals(Optional.of(Types.LONG), hbaseSchema.getRowKeyTypeInfo());
    assertArrayEquals(new String[] { "f1", "f2", "f3", "f4" }, hbaseSchema.getFamilyNames());
    assertArrayEquals(new String[] { "c1" }, hbaseSchema.getQualifierNames("f1"));
    assertArrayEquals(new String[] { "c1", "c2" }, hbaseSchema.getQualifierNames("f2"));
    assertArrayEquals(new String[] { "c1", "c2", "c3" }, hbaseSchema.getQualifierNames("f3"));
    assertArrayEquals(new String[] { "c1", "c2", "c3", "c4" }, hbaseSchema.getQualifierNames("f4"));
    assertArrayEquals(new DataType[] { INT() }, hbaseSchema.getQualifierDataTypes("f1"));
    assertArrayEquals(new DataType[] { INT(), BIGINT() }, hbaseSchema.getQualifierDataTypes("f2"));
    assertArrayEquals(new DataType[] { DOUBLE(), BOOLEAN(), STRING() }, hbaseSchema.getQualifierDataTypes("f3"));
    assertArrayEquals(new DataType[] { DECIMAL(10, 3), TIMESTAMP(3), DATE(), TIME() }, hbaseSchema.getQualifierDataTypes("f4"));
}
Also used : HBaseDynamicTableSource(org.apache.flink.connector.hbase2.source.HBaseDynamicTableSource) HBaseRowDataLookupFunction(org.apache.flink.connector.hbase.source.HBaseRowDataLookupFunction) LookupRuntimeProviderContext(org.apache.flink.table.runtime.connector.source.LookupRuntimeProviderContext) LookupTableSource(org.apache.flink.table.connector.source.LookupTableSource) HBaseTableSchema(org.apache.flink.connector.hbase.util.HBaseTableSchema) TableFunction(org.apache.flink.table.functions.TableFunction) AsyncTableFunction(org.apache.flink.table.functions.AsyncTableFunction) ResolvedSchema(org.apache.flink.table.catalog.ResolvedSchema) DynamicTableSource(org.apache.flink.table.connector.source.DynamicTableSource) HBaseDynamicTableSource(org.apache.flink.connector.hbase2.source.HBaseDynamicTableSource) AsyncTableFunctionProvider(org.apache.flink.table.connector.source.AsyncTableFunctionProvider) TableFunctionProvider(org.apache.flink.table.connector.source.TableFunctionProvider) Test(org.junit.Test)

Example 4 with TableFunction

use of org.apache.flink.table.functions.TableFunction in project flink by apache.

the class CommonExecLookupJoin method translateToPlanInternal.

@Override
@SuppressWarnings("unchecked")
public Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
    RelOptTable temporalTable = temporalTableSourceSpec.getTemporalTable(planner.getFlinkContext());
    // validate whether the node is valid and supported.
    validate(temporalTable);
    final ExecEdge inputEdge = getInputEdges().get(0);
    RowType inputRowType = (RowType) inputEdge.getOutputType();
    RowType tableSourceRowType = FlinkTypeFactory.toLogicalRowType(temporalTable.getRowType());
    RowType resultRowType = (RowType) getOutputType();
    validateLookupKeyType(lookupKeys, inputRowType, tableSourceRowType);
    boolean isAsyncEnabled = false;
    UserDefinedFunction userDefinedFunction = LookupJoinUtil.getLookupFunction(temporalTable, lookupKeys.keySet());
    UserDefinedFunctionHelper.prepareInstance(config, userDefinedFunction);
    if (userDefinedFunction instanceof AsyncTableFunction) {
        isAsyncEnabled = true;
    }
    boolean isLeftOuterJoin = joinType == FlinkJoinType.LEFT;
    StreamOperatorFactory<RowData> operatorFactory;
    if (isAsyncEnabled) {
        operatorFactory = createAsyncLookupJoin(temporalTable, config, lookupKeys, (AsyncTableFunction<Object>) userDefinedFunction, planner.getRelBuilder(), inputRowType, tableSourceRowType, resultRowType, isLeftOuterJoin);
    } else {
        operatorFactory = createSyncLookupJoin(temporalTable, config, lookupKeys, (TableFunction<Object>) userDefinedFunction, planner.getRelBuilder(), inputRowType, tableSourceRowType, resultRowType, isLeftOuterJoin, planner.getExecEnv().getConfig().isObjectReuseEnabled());
    }
    Transformation<RowData> inputTransformation = (Transformation<RowData>) inputEdge.translateToPlan(planner);
    return ExecNodeUtil.createOneInputTransformation(inputTransformation, createTransformationMeta(LOOKUP_JOIN_TRANSFORMATION, config), operatorFactory, InternalTypeInfo.of(resultRowType), inputTransformation.getParallelism());
}
Also used : RowData(org.apache.flink.table.data.RowData) Transformation(org.apache.flink.api.dag.Transformation) ExecEdge(org.apache.flink.table.planner.plan.nodes.exec.ExecEdge) UserDefinedFunction(org.apache.flink.table.functions.UserDefinedFunction) RowType(org.apache.flink.table.types.logical.RowType) AsyncTableFunction(org.apache.flink.table.functions.AsyncTableFunction) TableFunction(org.apache.flink.table.functions.TableFunction) RelOptTable(org.apache.calcite.plan.RelOptTable) AsyncTableFunction(org.apache.flink.table.functions.AsyncTableFunction)

Aggregations

TableFunction (org.apache.flink.table.functions.TableFunction)4 LookupTableSource (org.apache.flink.table.connector.source.LookupTableSource)3 TableFunctionProvider (org.apache.flink.table.connector.source.TableFunctionProvider)3 HBaseRowDataLookupFunction (org.apache.flink.connector.hbase.source.HBaseRowDataLookupFunction)2 HBaseTableSchema (org.apache.flink.connector.hbase.util.HBaseTableSchema)2 ResolvedSchema (org.apache.flink.table.catalog.ResolvedSchema)2 DynamicTableSource (org.apache.flink.table.connector.source.DynamicTableSource)2 RowData (org.apache.flink.table.data.RowData)2 AsyncTableFunction (org.apache.flink.table.functions.AsyncTableFunction)2 LookupRuntimeProviderContext (org.apache.flink.table.runtime.connector.source.LookupRuntimeProviderContext)2 RowType (org.apache.flink.table.types.logical.RowType)2 Test (org.junit.Test)2 Duration (java.time.Duration)1 ArrayList (java.util.ArrayList)1 List (java.util.List)1 Optional (java.util.Optional)1 RelOptTable (org.apache.calcite.plan.RelOptTable)1 VisibleForTesting (org.apache.flink.annotation.VisibleForTesting)1 Transformation (org.apache.flink.api.dag.Transformation)1 Configuration (org.apache.flink.configuration.Configuration)1