Search in sources :

Example 31 with RowType

use of org.apache.flink.table.types.logical.RowType in project flink by apache.

the class JsonRowDataSerDeSchemaTest method testDeserializationWithTypesMismatch.

@Test
public void testDeserializationWithTypesMismatch() {
    RowType rowType = (RowType) ROW(FIELD("f0", STRING()), FIELD("f1", INT())).getLogicalType();
    String json = "{\"f0\":\"abc\", \"f1\": \"abc\"}";
    JsonRowDataDeserializationSchema deserializationSchema = new JsonRowDataDeserializationSchema(rowType, InternalTypeInfo.of(rowType), false, false, TimestampFormat.SQL);
    String errorMessage = "Fail to deserialize at field: f1.";
    try {
        deserializationSchema.deserialize(json.getBytes());
        fail("expecting exception message: " + errorMessage);
    } catch (Throwable t) {
        assertThat(t, FlinkMatchers.containsMessage(errorMessage));
    }
}
Also used : RowType(org.apache.flink.table.types.logical.RowType) Test(org.junit.Test)

Example 32 with RowType

use of org.apache.flink.table.types.logical.RowType in project flink by apache.

the class OrcColumnarRowInputFormat method createPartitionedFormat.

/**
 * Create a partitioned {@link OrcColumnarRowInputFormat}, the partition columns can be
 * generated by split.
 */
public static <SplitT extends FileSourceSplit> OrcColumnarRowInputFormat<VectorizedRowBatch, SplitT> createPartitionedFormat(OrcShim<VectorizedRowBatch> shim, Configuration hadoopConfig, RowType tableType, List<String> partitionKeys, PartitionFieldExtractor<SplitT> extractor, int[] selectedFields, List<OrcFilters.Predicate> conjunctPredicates, int batchSize, Function<RowType, TypeInformation<RowData>> rowTypeInfoFactory) {
    // TODO FLINK-25113 all this partition keys code should be pruned from the orc format,
    // because now FileSystemTableSource uses FileInfoExtractorBulkFormat for reading partition
    // keys.
    String[] tableFieldNames = tableType.getFieldNames().toArray(new String[0]);
    LogicalType[] tableFieldTypes = tableType.getChildren().toArray(new LogicalType[0]);
    List<String> orcFieldNames = getNonPartNames(tableFieldNames, partitionKeys);
    int[] orcSelectedFields = getSelectedOrcFields(tableFieldNames, selectedFields, orcFieldNames);
    ColumnBatchFactory<VectorizedRowBatch, SplitT> batchGenerator = (SplitT split, VectorizedRowBatch rowBatch) -> {
        // create and initialize the row batch
        ColumnVector[] vectors = new ColumnVector[selectedFields.length];
        for (int i = 0; i < vectors.length; i++) {
            String name = tableFieldNames[selectedFields[i]];
            LogicalType type = tableFieldTypes[selectedFields[i]];
            vectors[i] = partitionKeys.contains(name) ? createFlinkVectorFromConstant(type, extractor.extract(split, name, type), batchSize) : createFlinkVector(rowBatch.cols[orcFieldNames.indexOf(name)], type);
        }
        return new VectorizedColumnBatch(vectors);
    };
    return new OrcColumnarRowInputFormat<>(shim, hadoopConfig, convertToOrcTypeWithPart(tableFieldNames, tableFieldTypes, partitionKeys), orcSelectedFields, conjunctPredicates, batchSize, batchGenerator, rowTypeInfoFactory.apply(new RowType(Arrays.stream(selectedFields).mapToObj(i -> tableType.getFields().get(i)).collect(Collectors.toList()))));
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) VectorizedColumnBatch(org.apache.flink.table.data.columnar.vector.VectorizedColumnBatch) LogicalType(org.apache.flink.table.types.logical.LogicalType) RowType(org.apache.flink.table.types.logical.RowType)

Example 33 with RowType

use of org.apache.flink.table.types.logical.RowType in project flink by apache.

the class OrcSplitReaderUtil method logicalTypeToOrcType.

/**
 * See {@code org.apache.flink.table.catalog.hive.util.HiveTypeUtil}.
 */
public static TypeDescription logicalTypeToOrcType(LogicalType type) {
    type = type.copy(true);
    switch(type.getTypeRoot()) {
        case CHAR:
            return TypeDescription.createChar().withMaxLength(((CharType) type).getLength());
        case VARCHAR:
            int len = ((VarCharType) type).getLength();
            if (len == VarCharType.MAX_LENGTH) {
                return TypeDescription.createString();
            } else {
                return TypeDescription.createVarchar().withMaxLength(len);
            }
        case BOOLEAN:
            return TypeDescription.createBoolean();
        case VARBINARY:
            if (type.equals(DataTypes.BYTES().getLogicalType())) {
                return TypeDescription.createBinary();
            } else {
                throw new UnsupportedOperationException("Not support other binary type: " + type);
            }
        case DECIMAL:
            DecimalType decimalType = (DecimalType) type;
            return TypeDescription.createDecimal().withScale(decimalType.getScale()).withPrecision(decimalType.getPrecision());
        case TINYINT:
            return TypeDescription.createByte();
        case SMALLINT:
            return TypeDescription.createShort();
        case INTEGER:
            return TypeDescription.createInt();
        case BIGINT:
            return TypeDescription.createLong();
        case FLOAT:
            return TypeDescription.createFloat();
        case DOUBLE:
            return TypeDescription.createDouble();
        case DATE:
            return TypeDescription.createDate();
        case TIMESTAMP_WITHOUT_TIME_ZONE:
            return TypeDescription.createTimestamp();
        case ARRAY:
            ArrayType arrayType = (ArrayType) type;
            return TypeDescription.createList(logicalTypeToOrcType(arrayType.getElementType()));
        case MAP:
            MapType mapType = (MapType) type;
            return TypeDescription.createMap(logicalTypeToOrcType(mapType.getKeyType()), logicalTypeToOrcType(mapType.getValueType()));
        case ROW:
            RowType rowType = (RowType) type;
            TypeDescription struct = TypeDescription.createStruct();
            for (int i = 0; i < rowType.getFieldCount(); i++) {
                struct.addField(rowType.getFieldNames().get(i), logicalTypeToOrcType(rowType.getChildren().get(i)));
            }
            return struct;
        default:
            throw new UnsupportedOperationException("Unsupported type: " + type);
    }
}
Also used : ArrayType(org.apache.flink.table.types.logical.ArrayType) DecimalType(org.apache.flink.table.types.logical.DecimalType) RowType(org.apache.flink.table.types.logical.RowType) TypeDescription(org.apache.orc.TypeDescription) VarCharType(org.apache.flink.table.types.logical.VarCharType) MapType(org.apache.flink.table.types.logical.MapType)

Example 34 with RowType

use of org.apache.flink.table.types.logical.RowType in project flink by apache.

the class HiveTableSink method createBulkWriterFactory.

private Optional<BulkWriter.Factory<RowData>> createBulkWriterFactory(String[] partitionColumns, StorageDescriptor sd) {
    String serLib = sd.getSerdeInfo().getSerializationLib().toLowerCase();
    int formatFieldCount = tableSchema.getFieldCount() - partitionColumns.length;
    String[] formatNames = new String[formatFieldCount];
    LogicalType[] formatTypes = new LogicalType[formatFieldCount];
    for (int i = 0; i < formatFieldCount; i++) {
        formatNames[i] = tableSchema.getFieldName(i).get();
        formatTypes[i] = tableSchema.getFieldDataType(i).get().getLogicalType();
    }
    RowType formatType = RowType.of(formatTypes, formatNames);
    if (serLib.contains("parquet")) {
        Configuration formatConf = new Configuration(jobConf);
        sd.getSerdeInfo().getParameters().forEach(formatConf::set);
        return Optional.of(ParquetRowDataBuilder.createWriterFactory(formatType, formatConf, hiveVersion.startsWith("3.")));
    } else if (serLib.contains("orc")) {
        Configuration formatConf = new ThreadLocalClassLoaderConfiguration(jobConf);
        sd.getSerdeInfo().getParameters().forEach(formatConf::set);
        TypeDescription typeDescription = OrcSplitReaderUtil.logicalTypeToOrcType(formatType);
        return Optional.of(hiveShim.createOrcBulkWriterFactory(formatConf, typeDescription.toString(), formatTypes));
    } else {
        return Optional.empty();
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) ThreadLocalClassLoaderConfiguration(org.apache.flink.orc.writer.ThreadLocalClassLoaderConfiguration) ThreadLocalClassLoaderConfiguration(org.apache.flink.orc.writer.ThreadLocalClassLoaderConfiguration) LogicalType(org.apache.flink.table.types.logical.LogicalType) RowType(org.apache.flink.table.types.logical.RowType) TypeDescription(org.apache.orc.TypeDescription)

Example 35 with RowType

use of org.apache.flink.table.types.logical.RowType in project flink by apache.

the class ParquetColumnarRowInputFormatTest method innerTestPartitionValues.

private void innerTestPartitionValues(Path testPath, List<String> partitionKeys, boolean nullPartValue) throws IOException {
    LogicalType[] fieldTypes = new LogicalType[] { new VarCharType(VarCharType.MAX_LENGTH), new BooleanType(), new TinyIntType(), new SmallIntType(), new IntType(), new BigIntType(), new FloatType(), new DoubleType(), new TimestampType(9), new DecimalType(5, 0), new DecimalType(15, 0), new DecimalType(20, 0), new DecimalType(5, 0), new DecimalType(15, 0), new DecimalType(20, 0), new BooleanType(), new DateType(), new TimestampType(9), new DoubleType(), new TinyIntType(), new SmallIntType(), new IntType(), new BigIntType(), new FloatType(), new DecimalType(5, 0), new DecimalType(15, 0), new DecimalType(20, 0), new VarCharType(VarCharType.MAX_LENGTH) };
    RowType rowType = RowType.of(fieldTypes, IntStream.range(0, 28).mapToObj(i -> "f" + i).toArray(String[]::new));
    int[] projected = new int[] { 7, 2, 4, 15, 19, 20, 21, 22, 23, 18, 16, 17, 24, 25, 26, 27 };
    RowType producedType = new RowType(Arrays.stream(projected).mapToObj(i -> rowType.getFields().get(i)).collect(Collectors.toList()));
    ParquetColumnarRowInputFormat<FileSourceSplit> format = ParquetColumnarRowInputFormat.createPartitionedFormat(new Configuration(), producedType, InternalTypeInfo.of(producedType), partitionKeys, PartitionFieldExtractor.forFileSystem("my_default_value"), 500, false, true);
    FileStatus fileStatus = testPath.getFileSystem().getFileStatus(testPath);
    AtomicInteger cnt = new AtomicInteger(0);
    forEachRemaining(format.createReader(EMPTY_CONF, new FileSourceSplit("id", testPath, 0, Long.MAX_VALUE, fileStatus.getModificationTime(), fileStatus.getLen())), row -> {
        int i = cnt.get();
        // common values
        assertEquals(i, row.getDouble(0), 0);
        assertEquals((byte) i, row.getByte(1));
        assertEquals(i, row.getInt(2));
        // partition values
        if (nullPartValue) {
            for (int j = 3; j < 16; j++) {
                assertTrue(row.isNullAt(j));
            }
        } else {
            assertTrue(row.getBoolean(3));
            assertEquals(9, row.getByte(4));
            assertEquals(10, row.getShort(5));
            assertEquals(11, row.getInt(6));
            assertEquals(12, row.getLong(7));
            assertEquals(13, row.getFloat(8), 0);
            assertEquals(6.6, row.getDouble(9), 0);
            assertEquals(DateTimeUtils.toInternal(Date.valueOf("2020-11-23")), row.getInt(10));
            assertEquals(LocalDateTime.of(1999, 1, 1, 1, 1), row.getTimestamp(11, 9).toLocalDateTime());
            assertEquals(DecimalData.fromBigDecimal(new BigDecimal(24), 5, 0), row.getDecimal(12, 5, 0));
            assertEquals(DecimalData.fromBigDecimal(new BigDecimal(25), 15, 0), row.getDecimal(13, 15, 0));
            assertEquals(DecimalData.fromBigDecimal(new BigDecimal(26), 20, 0), row.getDecimal(14, 20, 0));
            assertEquals("f27", row.getString(15).toString());
        }
        cnt.incrementAndGet();
    });
}
Also used : FileStatus(org.apache.flink.core.fs.FileStatus) FileSourceSplit(org.apache.flink.connector.file.src.FileSourceSplit) Configuration(org.apache.hadoop.conf.Configuration) BooleanType(org.apache.flink.table.types.logical.BooleanType) LogicalType(org.apache.flink.table.types.logical.LogicalType) BigIntType(org.apache.flink.table.types.logical.BigIntType) RowType(org.apache.flink.table.types.logical.RowType) BigDecimal(java.math.BigDecimal) TinyIntType(org.apache.flink.table.types.logical.TinyIntType) TinyIntType(org.apache.flink.table.types.logical.TinyIntType) IntType(org.apache.flink.table.types.logical.IntType) BigIntType(org.apache.flink.table.types.logical.BigIntType) SmallIntType(org.apache.flink.table.types.logical.SmallIntType) FloatType(org.apache.flink.table.types.logical.FloatType) SmallIntType(org.apache.flink.table.types.logical.SmallIntType) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) DoubleType(org.apache.flink.table.types.logical.DoubleType) TimestampType(org.apache.flink.table.types.logical.TimestampType) DecimalType(org.apache.flink.table.types.logical.DecimalType) VarCharType(org.apache.flink.table.types.logical.VarCharType) DateType(org.apache.flink.table.types.logical.DateType)

Aggregations

RowType (org.apache.flink.table.types.logical.RowType)212 RowData (org.apache.flink.table.data.RowData)108 LogicalType (org.apache.flink.table.types.logical.LogicalType)59 DataType (org.apache.flink.table.types.DataType)57 Transformation (org.apache.flink.api.dag.Transformation)50 ExecEdge (org.apache.flink.table.planner.plan.nodes.exec.ExecEdge)46 TableException (org.apache.flink.table.api.TableException)37 Test (org.junit.Test)36 GenericRowData (org.apache.flink.table.data.GenericRowData)33 ArrayList (java.util.ArrayList)28 List (java.util.List)28 OneInputTransformation (org.apache.flink.streaming.api.transformations.OneInputTransformation)26 RowDataKeySelector (org.apache.flink.table.runtime.keyselector.RowDataKeySelector)25 CodeGeneratorContext (org.apache.flink.table.planner.codegen.CodeGeneratorContext)22 TableConfig (org.apache.flink.table.api.TableConfig)19 ArrayType (org.apache.flink.table.types.logical.ArrayType)19 TimestampType (org.apache.flink.table.types.logical.TimestampType)19 DecimalType (org.apache.flink.table.types.logical.DecimalType)17 Collections (java.util.Collections)16 AggregateInfoList (org.apache.flink.table.planner.plan.utils.AggregateInfoList)16