Search in sources :

Example 6 with RowData

use of org.apache.flink.table.data.RowData in project flink by apache.

the class KinesisDynamicTableSinkFactoryTest method testGoodTableSinkForPartitionedTable.

@Test
public void testGoodTableSinkForPartitionedTable() {
    ResolvedSchema sinkSchema = defaultSinkSchema();
    DataType physicalDataType = sinkSchema.toPhysicalRowDataType();
    Map<String, String> sinkOptions = defaultTableOptions().build();
    List<String> sinkPartitionKeys = Arrays.asList("name", "curr_id");
    // Construct actual DynamicTableSink using FactoryUtil
    KinesisDynamicSink actualSink = (KinesisDynamicSink) createTableSink(sinkSchema, sinkPartitionKeys, sinkOptions);
    // Construct expected DynamicTableSink using factory under test
    KinesisDynamicSink expectedSink = (KinesisDynamicSink) new KinesisDynamicSink.KinesisDynamicTableSinkBuilder().setConsumedDataType(physicalDataType).setStream(STREAM_NAME).setKinesisClientProperties(defaultProducerProperties()).setEncodingFormat(new TestFormatFactory.EncodingFormatMock(",")).setPartitioner(new RowDataFieldsKinesisPartitionKeyGenerator((RowType) physicalDataType.getLogicalType(), sinkPartitionKeys)).build();
    // verify that the constructed DynamicTableSink is as expected
    Assertions.assertThat(actualSink).isEqualTo(expectedSink);
    // verify the produced sink
    DynamicTableSink.SinkRuntimeProvider sinkFunctionProvider = actualSink.getSinkRuntimeProvider(new SinkRuntimeProviderContext(false));
    Sink<RowData> sinkFunction = ((SinkV2Provider) sinkFunctionProvider).createSink();
    Assertions.assertThat(sinkFunction).isInstanceOf(KinesisDataStreamsSink.class);
}
Also used : SinkRuntimeProviderContext(org.apache.flink.table.runtime.connector.sink.SinkRuntimeProviderContext) RowType(org.apache.flink.table.types.logical.RowType) DynamicTableSink(org.apache.flink.table.connector.sink.DynamicTableSink) RowData(org.apache.flink.table.data.RowData) DataType(org.apache.flink.table.types.DataType) SinkV2Provider(org.apache.flink.table.connector.sink.SinkV2Provider) ResolvedSchema(org.apache.flink.table.catalog.ResolvedSchema) Test(org.junit.Test)

Example 7 with RowData

use of org.apache.flink.table.data.RowData in project flink by apache.

the class FileSystemLookupFunction method checkCacheReload.

private void checkCacheReload() {
    if (nextLoadTime > System.currentTimeMillis()) {
        return;
    }
    if (nextLoadTime > 0) {
        LOG.info("Lookup join cache has expired after {} minute(s), reloading", reloadInterval.toMinutes());
    } else {
        LOG.info("Populating lookup join cache");
    }
    int numRetry = 0;
    while (true) {
        cache.clear();
        try {
            long count = 0;
            GenericRowData reuse = new GenericRowData(rowType.getFieldCount());
            partitionReader.open(partitionFetcher.fetch(fetcherContext));
            RowData row;
            while ((row = partitionReader.read(reuse)) != null) {
                count++;
                RowData rowData = serializer.copy(row);
                RowData key = extractLookupKey(rowData);
                List<RowData> rows = cache.computeIfAbsent(key, k -> new ArrayList<>());
                rows.add(rowData);
            }
            partitionReader.close();
            nextLoadTime = System.currentTimeMillis() + reloadInterval.toMillis();
            LOG.info("Loaded {} row(s) into lookup join cache", count);
            return;
        } catch (Exception e) {
            if (numRetry >= MAX_RETRIES) {
                throw new FlinkRuntimeException(String.format("Failed to load table into cache after %d retries", numRetry), e);
            }
            numRetry++;
            long toSleep = numRetry * RETRY_INTERVAL.toMillis();
            LOG.warn(String.format("Failed to load table into cache, will retry in %d seconds", toSleep / 1000), e);
            try {
                Thread.sleep(toSleep);
            } catch (InterruptedException ex) {
                LOG.warn("Interrupted while waiting to retry failed cache load, aborting");
                throw new FlinkRuntimeException(ex);
            }
        }
    }
}
Also used : RowData(org.apache.flink.table.data.RowData) GenericRowData(org.apache.flink.table.data.GenericRowData) FlinkRuntimeException(org.apache.flink.util.FlinkRuntimeException) GenericRowData(org.apache.flink.table.data.GenericRowData) FlinkRuntimeException(org.apache.flink.util.FlinkRuntimeException)

Example 8 with RowData

use of org.apache.flink.table.data.RowData in project flink by apache.

the class HiveLookupTableSource method getLookupFunction.

private TableFunction<RowData> getLookupFunction(int[] keys) {
    final String defaultPartitionName = JobConfUtils.getDefaultPartitionName(jobConf);
    PartitionFetcher.Context<HiveTablePartition> fetcherContext = new HiveTablePartitionFetcherContext(tablePath, hiveShim, new JobConfWrapper(jobConf), catalogTable.getPartitionKeys(), getProducedTableSchema().getFieldDataTypes(), getProducedTableSchema().getFieldNames(), configuration, defaultPartitionName);
    final PartitionFetcher<HiveTablePartition> partitionFetcher;
    // avoid lambda capture
    final ObjectPath tableFullPath = tablePath;
    if (catalogTable.getPartitionKeys().isEmpty()) {
        // non-partitioned table, the fetcher fetches the partition which represents the given
        // table.
        partitionFetcher = context -> {
            List<HiveTablePartition> partValueList = new ArrayList<>();
            partValueList.add(context.getPartition(new ArrayList<>()).orElseThrow(() -> new IllegalArgumentException(String.format("Fetch partition fail for hive table %s.", tableFullPath))));
            return partValueList;
        };
    } else if (isStreamingSource()) {
        // streaming-read partitioned table, the fetcher fetches the latest partition of the
        // given table.
        partitionFetcher = context -> {
            List<HiveTablePartition> partValueList = new ArrayList<>();
            List<PartitionFetcher.Context.ComparablePartitionValue> comparablePartitionValues = context.getComparablePartitionValueList();
            // fetch latest partitions for partitioned table
            if (comparablePartitionValues.size() > 0) {
                // sort in desc order
                comparablePartitionValues.sort((o1, o2) -> o2.getComparator().compareTo(o1.getComparator()));
                PartitionFetcher.Context.ComparablePartitionValue maxPartition = comparablePartitionValues.get(0);
                partValueList.add(context.getPartition((List<String>) maxPartition.getPartitionValue()).orElseThrow(() -> new IllegalArgumentException(String.format("Fetch partition fail for hive table %s.", tableFullPath))));
            } else {
                throw new IllegalArgumentException(String.format("At least one partition is required when set '%s' to 'latest' in temporal join," + " but actual partition number is '%s' for hive table %s", STREAMING_SOURCE_PARTITION_INCLUDE.key(), comparablePartitionValues.size(), tableFullPath));
            }
            return partValueList;
        };
    } else {
        // bounded-read partitioned table, the fetcher fetches all partitions of the given
        // filesystem table.
        partitionFetcher = context -> {
            List<HiveTablePartition> partValueList = new ArrayList<>();
            List<PartitionFetcher.Context.ComparablePartitionValue> comparablePartitionValues = context.getComparablePartitionValueList();
            for (PartitionFetcher.Context.ComparablePartitionValue comparablePartitionValue : comparablePartitionValues) {
                partValueList.add(context.getPartition((List<String>) comparablePartitionValue.getPartitionValue()).orElseThrow(() -> new IllegalArgumentException(String.format("Fetch partition fail for hive table %s.", tableFullPath))));
            }
            return partValueList;
        };
    }
    PartitionReader<HiveTablePartition, RowData> partitionReader = new HiveInputFormatPartitionReader(flinkConf, jobConf, hiveVersion, tablePath, getProducedTableSchema().getFieldDataTypes(), getProducedTableSchema().getFieldNames(), catalogTable.getPartitionKeys(), projectedFields, flinkConf.get(HiveOptions.TABLE_EXEC_HIVE_FALLBACK_MAPRED_READER));
    return new FileSystemLookupFunction<>(partitionFetcher, fetcherContext, partitionReader, (RowType) getProducedTableSchema().toRowDataType().getLogicalType(), keys, hiveTableReloadInterval);
}
Also used : HivePartitionUtils(org.apache.flink.connectors.hive.util.HivePartitionUtils) TableFunction(org.apache.flink.table.functions.TableFunction) PartitionReader(org.apache.flink.connector.file.table.PartitionReader) DataType(org.apache.flink.table.types.DataType) CatalogTable(org.apache.flink.table.catalog.CatalogTable) LoggerFactory(org.slf4j.LoggerFactory) STREAMING_SOURCE_PARTITION_INCLUDE(org.apache.flink.connectors.hive.HiveOptions.STREAMING_SOURCE_PARTITION_INCLUDE) HiveInputFormatPartitionReader(org.apache.flink.connectors.hive.read.HiveInputFormatPartitionReader) JobConfUtils(org.apache.flink.connectors.hive.util.JobConfUtils) RowType(org.apache.flink.table.types.logical.RowType) ObjectPath(org.apache.flink.table.catalog.ObjectPath) Partition(org.apache.hadoop.hive.metastore.api.Partition) HiveShim(org.apache.flink.table.catalog.hive.client.HiveShim) ArrayList(java.util.ArrayList) LookupTableSource(org.apache.flink.table.connector.source.LookupTableSource) ReadableConfig(org.apache.flink.configuration.ReadableConfig) Duration(java.time.Duration) HivePartitionFetcherContextBase(org.apache.flink.connectors.hive.read.HivePartitionFetcherContextBase) RowData(org.apache.flink.table.data.RowData) Logger(org.slf4j.Logger) STREAMING_SOURCE_CONSUME_START_OFFSET(org.apache.flink.connectors.hive.HiveOptions.STREAMING_SOURCE_CONSUME_START_OFFSET) PartitionFetcher(org.apache.flink.connector.file.table.PartitionFetcher) Configuration(org.apache.flink.configuration.Configuration) Preconditions(org.apache.flink.util.Preconditions) VisibleForTesting(org.apache.flink.annotation.VisibleForTesting) JobConf(org.apache.hadoop.mapred.JobConf) LOOKUP_JOIN_CACHE_TTL(org.apache.flink.connectors.hive.HiveOptions.LOOKUP_JOIN_CACHE_TTL) List(java.util.List) Optional(java.util.Optional) TableFunctionProvider(org.apache.flink.table.connector.source.TableFunctionProvider) STREAMING_SOURCE_MONITOR_INTERVAL(org.apache.flink.connectors.hive.HiveOptions.STREAMING_SOURCE_MONITOR_INTERVAL) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) ObjectPath(org.apache.flink.table.catalog.ObjectPath) HiveInputFormatPartitionReader(org.apache.flink.connectors.hive.read.HiveInputFormatPartitionReader) ArrayList(java.util.ArrayList) RowData(org.apache.flink.table.data.RowData) PartitionFetcher(org.apache.flink.connector.file.table.PartitionFetcher) ArrayList(java.util.ArrayList) List(java.util.List)

Example 9 with RowData

use of org.apache.flink.table.data.RowData in project flink by apache.

the class HBaseRowDataAsyncLookupFunction method eval.

/**
 * The invoke entry point of lookup function.
 *
 * @param future The result or exception is returned.
 * @param rowKey the lookup key. Currently only support single rowkey.
 */
public void eval(CompletableFuture<Collection<RowData>> future, Object rowKey) {
    int currentRetry = 0;
    if (cache != null) {
        RowData cacheRowData = cache.getIfPresent(rowKey);
        if (cacheRowData != null) {
            if (cacheRowData.getArity() == 0) {
                future.complete(Collections.emptyList());
            } else {
                future.complete(Collections.singletonList(cacheRowData));
            }
            return;
        }
    }
    // fetch result
    fetchResult(future, currentRetry, rowKey);
}
Also used : GenericRowData(org.apache.flink.table.data.GenericRowData) RowData(org.apache.flink.table.data.RowData)

Example 10 with RowData

use of org.apache.flink.table.data.RowData in project flink by apache.

the class HBaseSerdeTest method convertToNewRowTest.

@Test
public void convertToNewRowTest() {
    HBaseSerde serde = createHBaseSerde();
    List<List<Cell>> cellsList = prepareCells();
    List<RowData> resultRowDatas = new ArrayList<>();
    List<String> resultRowDataStr = new ArrayList<>();
    for (List<Cell> cells : cellsList) {
        RowData row = serde.convertToNewRow(Result.create(cells));
        resultRowDatas.add(row);
        resultRowDataStr.add(row.toString());
    }
    // this verifies RowData is not reused
    assertFalse(resultRowDatas.get(0) == resultRowDatas.get(1));
    List<String> expected = new ArrayList<>();
    expected.add("+I(1,+I(10),+I(Hello-1,100),+I(1.01,false,Welt-1))");
    expected.add("+I(2,+I(20),+I(Hello-2,200),+I(2.02,true,Welt-2))");
    assertEquals(expected, resultRowDataStr);
}
Also used : RowData(org.apache.flink.table.data.RowData) ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List) Cell(org.apache.hadoop.hbase.Cell) Test(org.junit.Test)

Aggregations

RowData (org.apache.flink.table.data.RowData)602 Test (org.junit.Test)201 GenericRowData (org.apache.flink.table.data.GenericRowData)178 ArrayList (java.util.ArrayList)109 RowType (org.apache.flink.table.types.logical.RowType)105 JoinedRowData (org.apache.flink.table.data.utils.JoinedRowData)90 Watermark (org.apache.flink.streaming.api.watermark.Watermark)84 ConcurrentLinkedQueue (java.util.concurrent.ConcurrentLinkedQueue)72 Transformation (org.apache.flink.api.dag.Transformation)70 Configuration (org.apache.flink.configuration.Configuration)68 BinaryRowData (org.apache.flink.table.data.binary.BinaryRowData)67 List (java.util.List)65 ExecEdge (org.apache.flink.table.planner.plan.nodes.exec.ExecEdge)54 DataType (org.apache.flink.table.types.DataType)52 Map (java.util.Map)42 LogicalType (org.apache.flink.table.types.logical.LogicalType)41 TableException (org.apache.flink.table.api.TableException)34 OneInputTransformation (org.apache.flink.streaming.api.transformations.OneInputTransformation)33 RowDataKeySelector (org.apache.flink.table.runtime.keyselector.RowDataKeySelector)32 OperatorSubtaskState (org.apache.flink.runtime.checkpoint.OperatorSubtaskState)31