use of io.trino.orc.metadata.OrcColumnId in project trino by trinodb.
the class MapColumnWriter method finishRowGroup.
@Override
public Map<OrcColumnId, ColumnStatistics> finishRowGroup() {
checkState(!closed);
ColumnStatistics statistics = new ColumnStatistics((long) nonNullValueCount, 0, null, null, null, null, null, null, null, null, null);
rowGroupColumnStatistics.add(statistics);
nonNullValueCount = 0;
ImmutableMap.Builder<OrcColumnId, ColumnStatistics> columnStatistics = ImmutableMap.builder();
columnStatistics.put(columnId, statistics);
columnStatistics.putAll(keyWriter.finishRowGroup());
columnStatistics.putAll(valueWriter.finishRowGroup());
return columnStatistics.buildOrThrow();
}
use of io.trino.orc.metadata.OrcColumnId in project trino by trinodb.
the class TypeConverter method toOrcListType.
private static List<OrcType> toOrcListType(int nextFieldTypeIndex, Types.ListType listType, Map<String, String> attributes) {
nextFieldTypeIndex++;
Map<String, String> elementAttributes = ImmutableMap.<String, String>builder().put(ORC_ICEBERG_ID_KEY, Integer.toString(listType.elementId())).put(ORC_ICEBERG_REQUIRED_KEY, Boolean.toString(listType.isElementRequired())).buildOrThrow();
List<OrcType> itemTypes = toOrcType(nextFieldTypeIndex, listType.elementType(), elementAttributes);
List<OrcType> orcTypes = new ArrayList<>();
orcTypes.add(new OrcType(OrcTypeKind.LIST, ImmutableList.of(new OrcColumnId(nextFieldTypeIndex)), ImmutableList.of("item"), Optional.empty(), Optional.empty(), Optional.empty(), attributes));
orcTypes.addAll(itemTypes);
return orcTypes;
}
use of io.trino.orc.metadata.OrcColumnId in project trino by trinodb.
the class TypeConverter method toOrcStructType.
private static List<OrcType> toOrcStructType(int nextFieldTypeIndex, Types.StructType structType, Map<String, String> attributes) {
nextFieldTypeIndex++;
List<OrcColumnId> fieldTypeIndexes = new ArrayList<>();
List<String> fieldNames = new ArrayList<>();
List<List<OrcType>> fieldTypesList = new ArrayList<>();
for (Types.NestedField field : structType.fields()) {
fieldTypeIndexes.add(new OrcColumnId(nextFieldTypeIndex));
fieldNames.add(field.name());
Map<String, String> fieldAttributes = ImmutableMap.<String, String>builder().put(ORC_ICEBERG_ID_KEY, Integer.toString(field.fieldId())).put(ORC_ICEBERG_REQUIRED_KEY, Boolean.toString(field.isRequired())).buildOrThrow();
List<OrcType> fieldOrcTypes = toOrcType(nextFieldTypeIndex, field.type(), fieldAttributes);
fieldTypesList.add(fieldOrcTypes);
nextFieldTypeIndex += fieldOrcTypes.size();
}
ImmutableList.Builder<OrcType> orcTypes = ImmutableList.builder();
orcTypes.add(new OrcType(OrcTypeKind.STRUCT, fieldTypeIndexes, fieldNames, Optional.empty(), Optional.empty(), Optional.empty(), attributes));
fieldTypesList.forEach(orcTypes::addAll);
return orcTypes.build();
}
use of io.trino.orc.metadata.OrcColumnId in project trino by trinodb.
the class OrcWriteValidation method validateColumnStatisticsEquivalent.
private static void validateColumnStatisticsEquivalent(OrcDataSourceId orcDataSourceId, String name, ColumnMetadata<ColumnStatistics> actualColumnStatistics, ColumnMetadata<ColumnStatistics> expectedColumnStatistics) throws OrcCorruptionException {
requireNonNull(name, "name is null");
requireNonNull(actualColumnStatistics, "actualColumnStatistics is null");
requireNonNull(expectedColumnStatistics, "expectedColumnStatistics is null");
if (actualColumnStatistics.size() != expectedColumnStatistics.size()) {
throw new OrcCorruptionException(orcDataSourceId, "Write validation failed: unexpected number of columns in %s statistics", name);
}
for (int i = 0; i < actualColumnStatistics.size(); i++) {
OrcColumnId columnId = new OrcColumnId(i);
ColumnStatistics actual = actualColumnStatistics.get(columnId);
ColumnStatistics expected = expectedColumnStatistics.get(columnId);
validateColumnStatisticsEquivalent(orcDataSourceId, name + " column " + i, actual, expected);
}
}
use of io.trino.orc.metadata.OrcColumnId in project trino by trinodb.
the class OrcWriteValidation method validateRowGroupStatistics.
public void validateRowGroupStatistics(OrcDataSourceId orcDataSourceId, long stripeOffset, Map<StreamId, List<RowGroupIndex>> actualRowGroupStatistics) throws OrcCorruptionException {
requireNonNull(actualRowGroupStatistics, "actualRowGroupStatistics is null");
List<RowGroupStatistics> expectedRowGroupStatistics = rowGroupStatistics.get(stripeOffset);
if (expectedRowGroupStatistics == null) {
throw new OrcCorruptionException(orcDataSourceId, "Unexpected stripe at offset %s", stripeOffset);
}
int rowGroupCount = expectedRowGroupStatistics.size();
for (Entry<StreamId, List<RowGroupIndex>> entry : actualRowGroupStatistics.entrySet()) {
if (entry.getValue().size() != rowGroupCount) {
throw new OrcCorruptionException(orcDataSourceId, "Unexpected row group count stripe in at offset %s", stripeOffset);
}
}
for (int rowGroupIndex = 0; rowGroupIndex < expectedRowGroupStatistics.size(); rowGroupIndex++) {
RowGroupStatistics expectedRowGroup = expectedRowGroupStatistics.get(rowGroupIndex);
if (expectedRowGroup.getValidationMode() != HASHED) {
Map<OrcColumnId, ColumnStatistics> expectedStatistics = expectedRowGroup.getColumnStatistics();
Set<OrcColumnId> actualColumns = actualRowGroupStatistics.keySet().stream().map(StreamId::getColumnId).collect(Collectors.toSet());
if (!expectedStatistics.keySet().equals(actualColumns)) {
throw new OrcCorruptionException(orcDataSourceId, "Unexpected column in row group %s in stripe at offset %s", rowGroupIndex, stripeOffset);
}
for (Entry<StreamId, List<RowGroupIndex>> entry : actualRowGroupStatistics.entrySet()) {
ColumnStatistics actual = entry.getValue().get(rowGroupIndex).getColumnStatistics();
ColumnStatistics expected = expectedStatistics.get(entry.getKey().getColumnId());
validateColumnStatisticsEquivalent(orcDataSourceId, "Row group " + rowGroupIndex + " in stripe at offset " + stripeOffset, actual, expected);
}
}
if (expectedRowGroup.getValidationMode() != DETAILED) {
RowGroupStatistics actualRowGroup = buildActualRowGroupStatistics(rowGroupIndex, actualRowGroupStatistics);
if (expectedRowGroup.getHash() != actualRowGroup.getHash()) {
throw new OrcCorruptionException(orcDataSourceId, "Checksum mismatch for row group %s in stripe at offset %s", rowGroupIndex, stripeOffset);
}
}
}
}
Aggregations