Search in sources :

Example 56 with GenericRowData

use of org.apache.flink.table.data.GenericRowData in project flink by apache.

the class OggJsonDeserializationSchema method emitRow.

// --------------------------------------------------------------------------------------------
private void emitRow(GenericRowData rootRow, GenericRowData physicalRow, Collector<RowData> out) {
    // shortcut in case no output projection is required
    if (!hasMetadata) {
        out.collect(physicalRow);
        return;
    }
    final int physicalArity = physicalRow.getArity();
    final int metadataArity = metadataConverters.length;
    final GenericRowData producedRow = new GenericRowData(physicalRow.getRowKind(), physicalArity + metadataArity);
    for (int physicalPos = 0; physicalPos < physicalArity; physicalPos++) {
        producedRow.setField(physicalPos, physicalRow.getField(physicalPos));
    }
    for (int metadataPos = 0; metadataPos < metadataArity; metadataPos++) {
        producedRow.setField(physicalArity + metadataPos, metadataConverters[metadataPos].convert(rootRow));
    }
    out.collect(producedRow);
}
Also used : GenericRowData(org.apache.flink.table.data.GenericRowData)

Example 57 with GenericRowData

use of org.apache.flink.table.data.GenericRowData in project flink by apache.

the class OggJsonDeserializationSchema method deserialize.

@Override
public void deserialize(byte[] message, Collector<RowData> out) throws IOException {
    if (message == null || message.length == 0) {
        // skip tombstone messages
        return;
    }
    try {
        GenericRowData row = (GenericRowData) jsonDeserializer.deserialize(message);
        GenericRowData before = (GenericRowData) row.getField(0);
        GenericRowData after = (GenericRowData) row.getField(1);
        String op = row.getField(2).toString();
        if (OP_CREATE.equals(op)) {
            after.setRowKind(RowKind.INSERT);
            emitRow(row, after, out);
        } else if (OP_UPDATE.equals(op)) {
            if (before == null) {
                throw new IllegalStateException(String.format(REPLICA_IDENTITY_EXCEPTION, "UPDATE"));
            }
            before.setRowKind(RowKind.UPDATE_BEFORE);
            after.setRowKind(RowKind.UPDATE_AFTER);
            emitRow(row, before, out);
            emitRow(row, after, out);
        } else if (OP_DELETE.equals(op)) {
            if (before == null) {
                throw new IllegalStateException(String.format(REPLICA_IDENTITY_EXCEPTION, "DELETE"));
            }
            before.setRowKind(RowKind.DELETE);
            emitRow(row, before, out);
        } else {
            if (!ignoreParseErrors) {
                throw new IOException(format("Unknown \"op_type\" value \"%s\". The Ogg JSON message is '%s'", op, new String(message)));
            }
        }
    } catch (Throwable t) {
        // a big try catch to protect the processing.
        if (!ignoreParseErrors) {
            throw new IOException(format("Corrupt Ogg JSON message '%s'.", new String(message)), t);
        }
    }
}
Also used : GenericRowData(org.apache.flink.table.data.GenericRowData) IOException(java.io.IOException)

Example 58 with GenericRowData

use of org.apache.flink.table.data.GenericRowData in project flink by apache.

the class HiveInputFormatPartitionReaderITCase method testReadFormat.

private void testReadFormat(TableEnvironment tableEnv, HiveCatalog hiveCatalog, String format) throws Exception {
    String tableName = prepareData(tableEnv, format);
    ObjectPath tablePath = new ObjectPath("default", tableName);
    TableSchema tableSchema = hiveCatalog.getTable(tablePath).getSchema();
    // create partition reader
    HiveInputFormatPartitionReader partitionReader = new HiveInputFormatPartitionReader(new Configuration(), new JobConf(hiveCatalog.getHiveConf()), hiveCatalog.getHiveVersion(), tablePath, tableSchema.getFieldDataTypes(), tableSchema.getFieldNames(), Collections.emptyList(), null, false);
    Table hiveTable = hiveCatalog.getHiveTable(tablePath);
    // create HiveTablePartition to read from
    HiveTablePartition tablePartition = new HiveTablePartition(hiveTable.getSd(), HiveReflectionUtils.getTableMetadata(HiveShimLoader.loadHiveShim(hiveCatalog.getHiveVersion()), hiveTable));
    partitionReader.open(Collections.singletonList(tablePartition));
    GenericRowData reuse = new GenericRowData(tableSchema.getFieldCount());
    int count = 0;
    // this follows the way the partition reader is used during lookup join
    while (partitionReader.read(reuse) != null) {
        count++;
    }
    assertEquals(CollectionUtil.iteratorToList(tableEnv.executeSql("select * from " + tableName).collect()).size(), count);
}
Also used : ObjectPath(org.apache.flink.table.catalog.ObjectPath) HiveTablePartition(org.apache.flink.connectors.hive.HiveTablePartition) Table(org.apache.hadoop.hive.metastore.api.Table) TableSchema(org.apache.flink.table.api.TableSchema) Configuration(org.apache.flink.configuration.Configuration) GenericRowData(org.apache.flink.table.data.GenericRowData) JobConf(org.apache.hadoop.mapred.JobConf)

Example 59 with GenericRowData

use of org.apache.flink.table.data.GenericRowData in project flink by apache.

the class DebeziumAvroDeserializationSchema method deserialize.

@Override
public void deserialize(byte[] message, Collector<RowData> out) throws IOException {
    if (message == null || message.length == 0) {
        // skip tombstone messages
        return;
    }
    try {
        GenericRowData row = (GenericRowData) avroDeserializer.deserialize(message);
        GenericRowData before = (GenericRowData) row.getField(0);
        GenericRowData after = (GenericRowData) row.getField(1);
        String op = row.getField(2).toString();
        if (OP_CREATE.equals(op) || OP_READ.equals(op)) {
            after.setRowKind(RowKind.INSERT);
            out.collect(after);
        } else if (OP_UPDATE.equals(op)) {
            if (before == null) {
                throw new IllegalStateException(String.format(REPLICA_IDENTITY_EXCEPTION, "UPDATE"));
            }
            before.setRowKind(RowKind.UPDATE_BEFORE);
            after.setRowKind(RowKind.UPDATE_AFTER);
            out.collect(before);
            out.collect(after);
        } else if (OP_DELETE.equals(op)) {
            if (before == null) {
                throw new IllegalStateException(String.format(REPLICA_IDENTITY_EXCEPTION, "DELETE"));
            }
            before.setRowKind(RowKind.DELETE);
            out.collect(before);
        } else {
            throw new IOException(format("Unknown \"op\" value \"%s\". The Debezium Avro message is '%s'", op, new String(message)));
        }
    } catch (Throwable t) {
        // a big try catch to protect the processing.
        throw new IOException("Can't deserialize Debezium Avro message.", t);
    }
}
Also used : GenericRowData(org.apache.flink.table.data.GenericRowData) IOException(java.io.IOException)

Example 60 with GenericRowData

use of org.apache.flink.table.data.GenericRowData in project flink by apache.

the class OrcBulkRowDataWriterTest method getResults.

private static List<RowData> getResults(Reader reader) throws IOException {
    List<RowData> results = new ArrayList<>();
    RecordReader recordReader = reader.rows();
    VectorizedRowBatch batch = reader.getSchema().createRowBatch();
    while (recordReader.nextBatch(batch)) {
        BytesColumnVector stringVector = (BytesColumnVector) batch.cols[0];
        LongColumnVector intVector = (LongColumnVector) batch.cols[1];
        ListColumnVector listVector = (ListColumnVector) batch.cols[2];
        MapColumnVector mapVector = (MapColumnVector) batch.cols[3];
        for (int r = 0; r < batch.size; r++) {
            GenericRowData readRowData = new GenericRowData(4);
            readRowData.setField(0, readStringData(stringVector, r));
            readRowData.setField(1, readInt(intVector, r));
            readRowData.setField(2, readList(listVector, r));
            readRowData.setField(3, readMap(mapVector, r));
            results.add(readRowData);
        }
        recordReader.close();
    }
    return results;
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) GenericRowData(org.apache.flink.table.data.GenericRowData) RowData(org.apache.flink.table.data.RowData) ListColumnVector(org.apache.hadoop.hive.ql.exec.vector.ListColumnVector) MapColumnVector(org.apache.hadoop.hive.ql.exec.vector.MapColumnVector) RecordReader(org.apache.orc.RecordReader) ArrayList(java.util.ArrayList) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) GenericRowData(org.apache.flink.table.data.GenericRowData) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Aggregations

GenericRowData (org.apache.flink.table.data.GenericRowData)94 RowData (org.apache.flink.table.data.RowData)32 JoinedRowData (org.apache.flink.table.data.utils.JoinedRowData)16 Test (org.junit.Test)14 BinaryRowData (org.apache.flink.table.data.binary.BinaryRowData)13 RowType (org.apache.flink.table.types.logical.RowType)13 IOException (java.io.IOException)11 ArrayList (java.util.ArrayList)11 IntType (org.apache.flink.table.types.logical.IntType)11 List (java.util.List)9 LogicalType (org.apache.flink.table.types.logical.LogicalType)9 GenericArrayData (org.apache.flink.table.data.GenericArrayData)6 StringData (org.apache.flink.table.data.StringData)6 Arrays (java.util.Arrays)5 HashMap (java.util.HashMap)5 OutputStream (java.io.OutputStream)4 PrintStream (java.io.PrintStream)4 Collections (java.util.Collections)4 Random (java.util.Random)4 Consumer (java.util.function.Consumer)4