use of com.facebook.presto.common.type.StandardTypes.MAP in project presto by prestodb.
the class RcFileTester method testMapRoundTrip.
private void testMapRoundTrip(Type type, Iterable<?> writeValues, Set<Format> skipFormats) throws Exception {
// json does not support null keys, so we just write the first value
Object nullKeyWrite = Iterables.getFirst(writeValues, null);
// values in simple map and mix in some null values
testRoundTripType(createMapType(type), transform(insertNullEvery(5, writeValues), value -> toHiveMap(nullKeyWrite, value)), skipFormats);
}
use of com.facebook.presto.common.type.StandardTypes.MAP in project presto by prestodb.
the class QueryRewriter method applyPropertyOverride.
private static List<Property> applyPropertyOverride(List<Property> properties, List<Property> overrides) {
Map<String, Expression> propertyMap = properties.stream().collect(toImmutableMap(property -> property.getName().getValue().toLowerCase(ENGLISH), Property::getValue));
Map<String, Expression> overrideMap = overrides.stream().collect(toImmutableMap(property -> property.getName().getValue().toLowerCase(ENGLISH), Property::getValue));
return Stream.concat(propertyMap.entrySet().stream(), overrideMap.entrySet().stream()).collect(Collectors.toMap(Entry::getKey, Entry::getValue, (original, override) -> override)).entrySet().stream().map(entry -> new Property(new Identifier(entry.getKey()), entry.getValue())).collect(toImmutableList());
}
use of com.facebook.presto.common.type.StandardTypes.MAP in project presto by prestodb.
the class ParquetPageSourceFactory method checkSchemaMatch.
public static boolean checkSchemaMatch(org.apache.parquet.schema.Type parquetType, Type type) {
String prestoType = type.getTypeSignature().getBase();
if (parquetType instanceof GroupType) {
GroupType groupType = parquetType.asGroupType();
switch(prestoType) {
case ROW:
RowType rowType = (RowType) type;
Map<String, Type> prestoFieldMap = rowType.getFields().stream().collect(Collectors.toMap(field -> field.getName().get().toLowerCase(Locale.ENGLISH), field -> field.getType()));
for (int i = 0; i < groupType.getFields().size(); i++) {
org.apache.parquet.schema.Type parquetFieldType = groupType.getFields().get(i);
String fieldName = parquetFieldType.getName().toLowerCase(Locale.ENGLISH);
Type prestoFieldType = prestoFieldMap.get(fieldName);
if (prestoFieldType != null && !checkSchemaMatch(parquetFieldType, prestoFieldType)) {
return false;
}
}
return true;
case MAP:
if (groupType.getFields().size() != 1) {
return false;
}
org.apache.parquet.schema.Type mapKeyType = groupType.getFields().get(0);
if (mapKeyType instanceof GroupType) {
GroupType mapGroupType = mapKeyType.asGroupType();
return mapGroupType.getFields().size() == 2 && checkSchemaMatch(mapGroupType.getFields().get(0), type.getTypeParameters().get(0)) && checkSchemaMatch(mapGroupType.getFields().get(1), type.getTypeParameters().get(1));
}
return false;
case ARRAY:
/* array has a standard 3-level structure with middle level repeated group with a single field:
* optional group my_list (LIST) {
* repeated group element {
* required type field;
* };
* }
* Backward-compatibility support for 2-level arrays:
* optional group my_list (LIST) {
* repeated type field;
* }
* field itself could be primitive or group
*/
if (groupType.getFields().size() != 1) {
return false;
}
org.apache.parquet.schema.Type bagType = groupType.getFields().get(0);
if (bagType.isPrimitive()) {
return checkSchemaMatch(bagType.asPrimitiveType(), type.getTypeParameters().get(0));
}
GroupType bagGroupType = bagType.asGroupType();
return checkSchemaMatch(bagGroupType, type.getTypeParameters().get(0)) || (bagGroupType.getFields().size() == 1 && checkSchemaMatch(bagGroupType.getFields().get(0), type.getTypeParameters().get(0)));
default:
return false;
}
}
checkArgument(parquetType.isPrimitive(), "Unexpected parquet type for column: %s " + parquetType.getName());
PrimitiveTypeName parquetTypeName = parquetType.asPrimitiveType().getPrimitiveTypeName();
switch(parquetTypeName) {
case INT64:
return prestoType.equals(BIGINT) || prestoType.equals(DECIMAL) || prestoType.equals(TIMESTAMP);
case INT32:
return prestoType.equals(INTEGER) || prestoType.equals(BIGINT) || prestoType.equals(SMALLINT) || prestoType.equals(DATE) || prestoType.equals(DECIMAL) || prestoType.equals(TINYINT);
case BOOLEAN:
return prestoType.equals(StandardTypes.BOOLEAN);
case FLOAT:
return prestoType.equals(REAL);
case DOUBLE:
return prestoType.equals(StandardTypes.DOUBLE);
case BINARY:
return prestoType.equals(VARBINARY) || prestoType.equals(VARCHAR) || prestoType.startsWith(CHAR) || prestoType.equals(DECIMAL);
case INT96:
return prestoType.equals(TIMESTAMP);
case FIXED_LEN_BYTE_ARRAY:
return prestoType.equals(DECIMAL);
default:
throw new IllegalArgumentException("Unexpected parquet type name: " + parquetTypeName);
}
}
use of com.facebook.presto.common.type.StandardTypes.MAP in project presto by prestodb.
the class ParquetPageSourceFactory method createParquetPageSource.
public static ConnectorPageSource createParquetPageSource(HdfsEnvironment hdfsEnvironment, String user, Configuration configuration, Path path, long start, long length, long fileSize, List<HiveColumnHandle> columns, SchemaTableName tableName, boolean useParquetColumnNames, DataSize maxReadBlockSize, boolean batchReaderEnabled, boolean verificationEnabled, TypeManager typeManager, StandardFunctionResolution functionResolution, TupleDomain<HiveColumnHandle> effectivePredicate, FileFormatDataSourceStats stats, HiveFileContext hiveFileContext, ParquetMetadataSource parquetMetadataSource, boolean columnIndexFilterEnabled) {
AggregatedMemoryContext systemMemoryContext = newSimpleAggregatedMemoryContext();
ParquetDataSource dataSource = null;
try {
FSDataInputStream inputStream = hdfsEnvironment.getFileSystem(user, path, configuration).openFile(path, hiveFileContext);
dataSource = buildHdfsParquetDataSource(inputStream, path, stats);
ParquetMetadata parquetMetadata = parquetMetadataSource.getParquetMetadata(dataSource, fileSize, hiveFileContext.isCacheable()).getParquetMetadata();
if (!columns.isEmpty() && columns.stream().allMatch(hiveColumnHandle -> hiveColumnHandle.getColumnType() == AGGREGATED)) {
return new AggregatedParquetPageSource(columns, parquetMetadata, typeManager, functionResolution);
}
FileMetaData fileMetaData = parquetMetadata.getFileMetaData();
MessageType fileSchema = fileMetaData.getSchema();
Optional<MessageType> message = columns.stream().filter(column -> column.getColumnType() == REGULAR || isPushedDownSubfield(column)).map(column -> getColumnType(typeManager.getType(column.getTypeSignature()), fileSchema, useParquetColumnNames, column, tableName, path)).filter(Optional::isPresent).map(Optional::get).map(type -> new MessageType(fileSchema.getName(), type)).reduce(MessageType::union);
MessageType requestedSchema = message.orElse(new MessageType(fileSchema.getName(), ImmutableList.of()));
ImmutableList.Builder<BlockMetaData> footerBlocks = ImmutableList.builder();
for (BlockMetaData block : parquetMetadata.getBlocks()) {
long firstDataPage = block.getColumns().get(0).getFirstDataPageOffset();
if (firstDataPage >= start && firstDataPage < start + length) {
footerBlocks.add(block);
}
}
Map<List<String>, RichColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, requestedSchema);
TupleDomain<ColumnDescriptor> parquetTupleDomain = getParquetTupleDomain(descriptorsByPath, effectivePredicate);
Predicate parquetPredicate = buildPredicate(requestedSchema, parquetTupleDomain, descriptorsByPath);
final ParquetDataSource finalDataSource = dataSource;
ImmutableList.Builder<BlockMetaData> blocks = ImmutableList.builder();
List<ColumnIndexStore> blockIndexStores = new ArrayList<>();
for (BlockMetaData block : footerBlocks.build()) {
Optional<ColumnIndexStore> columnIndexStore = ColumnIndexFilterUtils.getColumnIndexStore(parquetPredicate, finalDataSource, block, descriptorsByPath, columnIndexFilterEnabled);
if (predicateMatches(parquetPredicate, block, finalDataSource, descriptorsByPath, parquetTupleDomain, columnIndexStore, columnIndexFilterEnabled)) {
blocks.add(block);
blockIndexStores.add(columnIndexStore.orElse(null));
hiveFileContext.incrementCounter("parquet.blocksRead", 1);
hiveFileContext.incrementCounter("parquet.rowsRead", block.getRowCount());
hiveFileContext.incrementCounter("parquet.totalBytesRead", block.getTotalByteSize());
} else {
hiveFileContext.incrementCounter("parquet.blocksSkipped", 1);
hiveFileContext.incrementCounter("parquet.rowsSkipped", block.getRowCount());
hiveFileContext.incrementCounter("parquet.totalBytesSkipped", block.getTotalByteSize());
}
}
MessageColumnIO messageColumnIO = getColumnIO(fileSchema, requestedSchema);
ParquetReader parquetReader = new ParquetReader(messageColumnIO, blocks.build(), dataSource, systemMemoryContext, maxReadBlockSize, batchReaderEnabled, verificationEnabled, parquetPredicate, blockIndexStores, columnIndexFilterEnabled);
ImmutableList.Builder<String> namesBuilder = ImmutableList.builder();
ImmutableList.Builder<Type> typesBuilder = ImmutableList.builder();
ImmutableList.Builder<Optional<Field>> fieldsBuilder = ImmutableList.builder();
for (HiveColumnHandle column : columns) {
checkArgument(column.getColumnType() == REGULAR || column.getColumnType() == SYNTHESIZED, "column type must be regular or synthesized column");
String name = column.getName();
Type type = typeManager.getType(column.getTypeSignature());
namesBuilder.add(name);
typesBuilder.add(type);
if (column.getColumnType() == SYNTHESIZED) {
Subfield pushedDownSubfield = getPushedDownSubfield(column);
List<String> nestedColumnPath = nestedColumnPath(pushedDownSubfield);
Optional<ColumnIO> columnIO = findNestedColumnIO(lookupColumnByName(messageColumnIO, pushedDownSubfield.getRootName()), nestedColumnPath);
if (columnIO.isPresent()) {
fieldsBuilder.add(constructField(type, columnIO.get()));
} else {
fieldsBuilder.add(Optional.empty());
}
} else if (getParquetType(type, fileSchema, useParquetColumnNames, column, tableName, path).isPresent()) {
String columnName = useParquetColumnNames ? name : fileSchema.getFields().get(column.getHiveColumnIndex()).getName();
fieldsBuilder.add(constructField(type, lookupColumnByName(messageColumnIO, columnName)));
} else {
fieldsBuilder.add(Optional.empty());
}
}
return new ParquetPageSource(parquetReader, typesBuilder.build(), fieldsBuilder.build(), namesBuilder.build(), hiveFileContext.getStats());
} catch (Exception e) {
try {
if (dataSource != null) {
dataSource.close();
}
} catch (IOException ignored) {
}
if (e instanceof PrestoException) {
throw (PrestoException) e;
}
if (e instanceof ParquetCorruptionException) {
throw new PrestoException(HIVE_BAD_DATA, e);
}
if (e instanceof AccessControlException) {
throw new PrestoException(PERMISSION_DENIED, e.getMessage(), e);
}
if (nullToEmpty(e.getMessage()).trim().equals("Filesystem closed") || e instanceof FileNotFoundException) {
throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, e);
}
String message = format("Error opening Hive split %s (offset=%s, length=%s): %s", path, start, length, e.getMessage());
if (e.getClass().getSimpleName().equals("BlockMissingException")) {
throw new PrestoException(HIVE_MISSING_DATA, message, e);
}
throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, message, e);
}
}
use of com.facebook.presto.common.type.StandardTypes.MAP in project presto by prestodb.
the class RcFileTester method preprocessWriteValueOld.
private static Object preprocessWriteValueOld(Type type, Object value) {
if (value == null) {
return null;
}
if (type.equals(BOOLEAN)) {
return value;
} else if (type.equals(TINYINT)) {
return ((Number) value).byteValue();
} else if (type.equals(SMALLINT)) {
return ((Number) value).shortValue();
} else if (type.equals(INTEGER)) {
return ((Number) value).intValue();
} else if (type.equals(BIGINT)) {
return ((Number) value).longValue();
} else if (type.equals(REAL)) {
return ((Number) value).floatValue();
} else if (type.equals(DOUBLE)) {
return ((Number) value).doubleValue();
} else if (type instanceof VarcharType) {
return value;
} else if (type.equals(VARBINARY)) {
return ((SqlVarbinary) value).getBytes();
} else if (type.equals(DATE)) {
int days = ((SqlDate) value).getDays();
LocalDate localDate = LocalDate.ofEpochDay(days);
ZonedDateTime zonedDateTime = localDate.atStartOfDay(ZoneId.systemDefault());
long millis = zonedDateTime.toEpochSecond() * 1000;
Date date = new Date(0);
// mills must be set separately to avoid masking
date.setTime(millis);
return date;
} else if (type.equals(TIMESTAMP)) {
long millisUtc = (int) ((SqlTimestamp) value).getMillisUtc();
return new Timestamp(millisUtc);
} else if (type instanceof DecimalType) {
return HiveDecimal.create(((SqlDecimal) value).toBigDecimal());
} else if (type.getTypeSignature().getBase().equals(ARRAY)) {
Type elementType = type.getTypeParameters().get(0);
return ((List<?>) value).stream().map(element -> preprocessWriteValueOld(elementType, element)).collect(toList());
} else if (type.getTypeSignature().getBase().equals(MAP)) {
Type keyType = type.getTypeParameters().get(0);
Type valueType = type.getTypeParameters().get(1);
Map<Object, Object> newMap = new HashMap<>();
for (Entry<?, ?> entry : ((Map<?, ?>) value).entrySet()) {
newMap.put(preprocessWriteValueOld(keyType, entry.getKey()), preprocessWriteValueOld(valueType, entry.getValue()));
}
return newMap;
} else if (type.getTypeSignature().getBase().equals(ROW)) {
List<?> fieldValues = (List<?>) value;
List<Type> fieldTypes = type.getTypeParameters();
List<Object> newStruct = new ArrayList<>();
for (int fieldId = 0; fieldId < fieldValues.size(); fieldId++) {
newStruct.add(preprocessWriteValueOld(fieldTypes.get(fieldId), fieldValues.get(fieldId)));
}
return newStruct;
}
throw new IllegalArgumentException("unsupported type: " + type);
}
Aggregations