Search in sources :

Example 16 with OrcColumnId

use of io.trino.orc.metadata.OrcColumnId in project trino by trinodb.

the class MapColumnWriter method finishRowGroup.

@Override
public Map<OrcColumnId, ColumnStatistics> finishRowGroup() {
    checkState(!closed);
    ColumnStatistics statistics = new ColumnStatistics((long) nonNullValueCount, 0, null, null, null, null, null, null, null, null, null);
    rowGroupColumnStatistics.add(statistics);
    nonNullValueCount = 0;
    ImmutableMap.Builder<OrcColumnId, ColumnStatistics> columnStatistics = ImmutableMap.builder();
    columnStatistics.put(columnId, statistics);
    columnStatistics.putAll(keyWriter.finishRowGroup());
    columnStatistics.putAll(valueWriter.finishRowGroup());
    return columnStatistics.buildOrThrow();
}
Also used : ColumnStatistics(io.trino.orc.metadata.statistics.ColumnStatistics) OrcColumnId(io.trino.orc.metadata.OrcColumnId) ImmutableMap(com.google.common.collect.ImmutableMap)

Example 17 with OrcColumnId

use of io.trino.orc.metadata.OrcColumnId in project trino by trinodb.

the class TypeConverter method toOrcListType.

private static List<OrcType> toOrcListType(int nextFieldTypeIndex, Types.ListType listType, Map<String, String> attributes) {
    nextFieldTypeIndex++;
    Map<String, String> elementAttributes = ImmutableMap.<String, String>builder().put(ORC_ICEBERG_ID_KEY, Integer.toString(listType.elementId())).put(ORC_ICEBERG_REQUIRED_KEY, Boolean.toString(listType.isElementRequired())).buildOrThrow();
    List<OrcType> itemTypes = toOrcType(nextFieldTypeIndex, listType.elementType(), elementAttributes);
    List<OrcType> orcTypes = new ArrayList<>();
    orcTypes.add(new OrcType(OrcTypeKind.LIST, ImmutableList.of(new OrcColumnId(nextFieldTypeIndex)), ImmutableList.of("item"), Optional.empty(), Optional.empty(), Optional.empty(), attributes));
    orcTypes.addAll(itemTypes);
    return orcTypes;
}
Also used : OrcColumnId(io.trino.orc.metadata.OrcColumnId) OrcType(io.trino.orc.metadata.OrcType) ArrayList(java.util.ArrayList)

Example 18 with OrcColumnId

use of io.trino.orc.metadata.OrcColumnId in project trino by trinodb.

the class TypeConverter method toOrcStructType.

private static List<OrcType> toOrcStructType(int nextFieldTypeIndex, Types.StructType structType, Map<String, String> attributes) {
    nextFieldTypeIndex++;
    List<OrcColumnId> fieldTypeIndexes = new ArrayList<>();
    List<String> fieldNames = new ArrayList<>();
    List<List<OrcType>> fieldTypesList = new ArrayList<>();
    for (Types.NestedField field : structType.fields()) {
        fieldTypeIndexes.add(new OrcColumnId(nextFieldTypeIndex));
        fieldNames.add(field.name());
        Map<String, String> fieldAttributes = ImmutableMap.<String, String>builder().put(ORC_ICEBERG_ID_KEY, Integer.toString(field.fieldId())).put(ORC_ICEBERG_REQUIRED_KEY, Boolean.toString(field.isRequired())).buildOrThrow();
        List<OrcType> fieldOrcTypes = toOrcType(nextFieldTypeIndex, field.type(), fieldAttributes);
        fieldTypesList.add(fieldOrcTypes);
        nextFieldTypeIndex += fieldOrcTypes.size();
    }
    ImmutableList.Builder<OrcType> orcTypes = ImmutableList.builder();
    orcTypes.add(new OrcType(OrcTypeKind.STRUCT, fieldTypeIndexes, fieldNames, Optional.empty(), Optional.empty(), Optional.empty(), attributes));
    fieldTypesList.forEach(orcTypes::addAll);
    return orcTypes.build();
}
Also used : OrcColumnId(io.trino.orc.metadata.OrcColumnId) Types(org.apache.iceberg.types.Types) StandardTypes(io.trino.spi.type.StandardTypes) ImmutableList(com.google.common.collect.ImmutableList) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ArrayList(java.util.ArrayList) OrcType(io.trino.orc.metadata.OrcType) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) List(java.util.List)

Example 19 with OrcColumnId

use of io.trino.orc.metadata.OrcColumnId in project trino by trinodb.

the class OrcWriteValidation method validateColumnStatisticsEquivalent.

private static void validateColumnStatisticsEquivalent(OrcDataSourceId orcDataSourceId, String name, ColumnMetadata<ColumnStatistics> actualColumnStatistics, ColumnMetadata<ColumnStatistics> expectedColumnStatistics) throws OrcCorruptionException {
    requireNonNull(name, "name is null");
    requireNonNull(actualColumnStatistics, "actualColumnStatistics is null");
    requireNonNull(expectedColumnStatistics, "expectedColumnStatistics is null");
    if (actualColumnStatistics.size() != expectedColumnStatistics.size()) {
        throw new OrcCorruptionException(orcDataSourceId, "Write validation failed: unexpected number of columns in %s statistics", name);
    }
    for (int i = 0; i < actualColumnStatistics.size(); i++) {
        OrcColumnId columnId = new OrcColumnId(i);
        ColumnStatistics actual = actualColumnStatistics.get(columnId);
        ColumnStatistics expected = expectedColumnStatistics.get(columnId);
        validateColumnStatisticsEquivalent(orcDataSourceId, name + " column " + i, actual, expected);
    }
}
Also used : ColumnStatistics(io.trino.orc.metadata.statistics.ColumnStatistics) OrcColumnId(io.trino.orc.metadata.OrcColumnId)

Example 20 with OrcColumnId

use of io.trino.orc.metadata.OrcColumnId in project trino by trinodb.

the class OrcWriteValidation method validateRowGroupStatistics.

public void validateRowGroupStatistics(OrcDataSourceId orcDataSourceId, long stripeOffset, Map<StreamId, List<RowGroupIndex>> actualRowGroupStatistics) throws OrcCorruptionException {
    requireNonNull(actualRowGroupStatistics, "actualRowGroupStatistics is null");
    List<RowGroupStatistics> expectedRowGroupStatistics = rowGroupStatistics.get(stripeOffset);
    if (expectedRowGroupStatistics == null) {
        throw new OrcCorruptionException(orcDataSourceId, "Unexpected stripe at offset %s", stripeOffset);
    }
    int rowGroupCount = expectedRowGroupStatistics.size();
    for (Entry<StreamId, List<RowGroupIndex>> entry : actualRowGroupStatistics.entrySet()) {
        if (entry.getValue().size() != rowGroupCount) {
            throw new OrcCorruptionException(orcDataSourceId, "Unexpected row group count stripe in at offset %s", stripeOffset);
        }
    }
    for (int rowGroupIndex = 0; rowGroupIndex < expectedRowGroupStatistics.size(); rowGroupIndex++) {
        RowGroupStatistics expectedRowGroup = expectedRowGroupStatistics.get(rowGroupIndex);
        if (expectedRowGroup.getValidationMode() != HASHED) {
            Map<OrcColumnId, ColumnStatistics> expectedStatistics = expectedRowGroup.getColumnStatistics();
            Set<OrcColumnId> actualColumns = actualRowGroupStatistics.keySet().stream().map(StreamId::getColumnId).collect(Collectors.toSet());
            if (!expectedStatistics.keySet().equals(actualColumns)) {
                throw new OrcCorruptionException(orcDataSourceId, "Unexpected column in row group %s in stripe at offset %s", rowGroupIndex, stripeOffset);
            }
            for (Entry<StreamId, List<RowGroupIndex>> entry : actualRowGroupStatistics.entrySet()) {
                ColumnStatistics actual = entry.getValue().get(rowGroupIndex).getColumnStatistics();
                ColumnStatistics expected = expectedStatistics.get(entry.getKey().getColumnId());
                validateColumnStatisticsEquivalent(orcDataSourceId, "Row group " + rowGroupIndex + " in stripe at offset " + stripeOffset, actual, expected);
            }
        }
        if (expectedRowGroup.getValidationMode() != DETAILED) {
            RowGroupStatistics actualRowGroup = buildActualRowGroupStatistics(rowGroupIndex, actualRowGroupStatistics);
            if (expectedRowGroup.getHash() != actualRowGroup.getHash()) {
                throw new OrcCorruptionException(orcDataSourceId, "Checksum mismatch for row group %s in stripe at offset %s", rowGroupIndex, stripeOffset);
            }
        }
    }
}
Also used : ColumnStatistics(io.trino.orc.metadata.statistics.ColumnStatistics) OrcColumnId(io.trino.orc.metadata.OrcColumnId) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList)

Aggregations

OrcColumnId (io.trino.orc.metadata.OrcColumnId)24 ImmutableMap (com.google.common.collect.ImmutableMap)10 Stream (io.trino.orc.metadata.Stream)9 ColumnStatistics (io.trino.orc.metadata.statistics.ColumnStatistics)9 ArrayList (java.util.ArrayList)9 OrcType (io.trino.orc.metadata.OrcType)8 List (java.util.List)8 ImmutableList (com.google.common.collect.ImmutableList)7 Slice (io.airlift.slice.Slice)5 CompressionKind (io.trino.orc.metadata.CompressionKind)5 Map (java.util.Map)5 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)4 Footer (io.trino.orc.metadata.Footer)4 OrcInputStream (io.trino.orc.stream.OrcInputStream)4 Page (io.trino.spi.Page)4 IOException (java.io.IOException)4 InputStream (java.io.InputStream)4 ByteBuffer (java.nio.ByteBuffer)3 Configuration (org.apache.hadoop.conf.Configuration)3 Path (org.apache.hadoop.fs.Path)3