use of com.facebook.presto.common.type.IntegerType.INTEGER in project presto by prestodb.
the class TestSelectiveOrcReader method testArrays.
@Test
public void testArrays() throws Exception {
Random random = new Random(0);
// non-null arrays of varying sizes; some arrays may be empty
tester.testRoundTrip(arrayType(INTEGER), createList(NUM_ROWS, i -> randomIntegers(random.nextInt(10), random)), IS_NULL, IS_NOT_NULL);
BigintRange negative = BigintRange.of(Integer.MIN_VALUE, 0, false);
BigintRange nonNegative = BigintRange.of(0, Integer.MAX_VALUE, false);
// arrays of strings
tester.testRoundTrip(arrayType(VARCHAR), createList(1000, i -> randomStrings(5 + random.nextInt(5), random)), ImmutableList.of(toSubfieldFilter("c[1]", IS_NULL), toSubfieldFilter("c[1]", stringIn(true, "a", "b", "c", "d"))));
tester.testRoundTrip(arrayType(VARCHAR), createList(10, i -> randomStringsWithNulls(5 + random.nextInt(5), random)), ImmutableList.of(toSubfieldFilter("c[1]", IS_NULL), toSubfieldFilter("c[1]", stringIn(true, "a", "b", "c", "d"))));
// non-empty non-null arrays of varying sizes
tester.testRoundTrip(arrayType(INTEGER), createList(NUM_ROWS, i -> randomIntegers(5 + random.nextInt(5), random)), ImmutableList.of(toSubfieldFilter(IS_NULL), toSubfieldFilter(IS_NOT_NULL), // c[1] >= 0
toSubfieldFilter("c[1]", nonNegative), // c[2] >= 0 AND c[4] >= 0
ImmutableMap.of(new Subfield("c[2]"), nonNegative, new Subfield("c[4]"), nonNegative)));
// non-null arrays of varying sizes; some arrays may be empty
tester.testRoundTripTypes(ImmutableList.of(INTEGER, arrayType(INTEGER)), ImmutableList.of(randomIntegers(NUM_ROWS, random), createList(NUM_ROWS, i -> randomIntegers(random.nextInt(10), random))), toSubfieldFilters(ImmutableMap.of(0, nonNegative), ImmutableMap.of(0, nonNegative, 1, IS_NULL), ImmutableMap.of(0, nonNegative, 1, IS_NOT_NULL)));
// non-empty non-null arrays of varying sizes
tester.testRoundTripTypes(ImmutableList.of(INTEGER, arrayType(INTEGER)), ImmutableList.of(randomIntegers(NUM_ROWS, random), createList(NUM_ROWS, i -> randomIntegers(5 + random.nextInt(5), random))), ImmutableList.of(// c[1] >= 0
ImmutableMap.of(0, toSubfieldFilter(nonNegative), 1, toSubfieldFilter("c[1]", nonNegative)), // c[3] >= 0
ImmutableMap.of(0, toSubfieldFilter(nonNegative), 1, toSubfieldFilter("c[3]", nonNegative)), // c[2] >= 0 AND c[4] <= 0
ImmutableMap.of(0, toSubfieldFilter(nonNegative), 1, ImmutableMap.of(new Subfield("c[2]"), nonNegative, new Subfield("c[4]"), negative))));
// nested arrays
tester.testRoundTripTypes(ImmutableList.of(INTEGER, arrayType(arrayType(INTEGER))), ImmutableList.of(randomIntegers(NUM_ROWS, random), createList(NUM_ROWS, i -> createList(random.nextInt(10), index -> randomIntegers(random.nextInt(5), random)))), toSubfieldFilters(ImmutableMap.of(0, nonNegative), ImmutableMap.of(1, IS_NULL), ImmutableMap.of(1, IS_NOT_NULL), ImmutableMap.of(0, nonNegative, 1, IS_NULL)));
tester.testRoundTripTypes(ImmutableList.of(INTEGER, arrayType(arrayType(INTEGER))), ImmutableList.of(randomIntegers(NUM_ROWS, random), createList(NUM_ROWS, i -> createList(3 + random.nextInt(10), index -> randomIntegers(3 + random.nextInt(5), random)))), ImmutableList.of(// c[1] IS NULL
ImmutableMap.of(1, ImmutableMap.of(new Subfield("c[1]"), IS_NULL)), // c[2] IS NOT NULL AND c[2][3] >= 0
ImmutableMap.of(1, ImmutableMap.of(new Subfield("c[2]"), IS_NOT_NULL, new Subfield("c[2][3]"), nonNegative)), ImmutableMap.of(0, toSubfieldFilter(nonNegative), 1, ImmutableMap.of(new Subfield("c[1]"), IS_NULL))));
}
use of com.facebook.presto.common.type.IntegerType.INTEGER in project presto by prestodb.
the class TupleDomainParquetPredicate method getDomain.
/**
* Get a domain for the ranges defined by each pair of elements from {@code minimums} and {@code maximums}.
* Both arrays must have the same length.
*/
private static Domain getDomain(ColumnDescriptor column, Type type, List<Object> minimums, List<Object> maximums, boolean hasNullValue) {
checkArgument(minimums.size() == maximums.size(), "Expected minimums and maximums to have the same size");
List<Range> ranges = new ArrayList<>();
if (type.equals(BOOLEAN)) {
boolean hasTrueValues = minimums.stream().anyMatch(value -> (boolean) value) || maximums.stream().anyMatch(value -> (boolean) value);
boolean hasFalseValues = minimums.stream().anyMatch(value -> !(boolean) value) || maximums.stream().anyMatch(value -> !(boolean) value);
if (hasTrueValues && hasFalseValues) {
return Domain.all(type);
}
if (hasTrueValues) {
return Domain.create(ValueSet.of(type, true), hasNullValue);
}
if (hasFalseValues) {
return Domain.create(ValueSet.of(type, false), hasNullValue);
}
// All nulls case is handled earlier
throw new VerifyException("Impossible boolean statistics");
}
if ((type.equals(BIGINT) || type.equals(TINYINT) || type.equals(SMALLINT) || type.equals(INTEGER))) {
for (int i = 0; i < minimums.size(); i++) {
long min = asLong(minimums.get(i));
long max = asLong(maximums.get(i));
if (isStatisticsOverflow(type, min, max)) {
return Domain.create(ValueSet.all(type), hasNullValue);
}
ranges.add(Range.range(type, min, true, max, true));
}
checkArgument(!ranges.isEmpty(), "cannot use empty ranges");
return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
}
if (type.equals(REAL)) {
for (int i = 0; i < minimums.size(); i++) {
Float min = (Float) minimums.get(i);
Float max = (Float) maximums.get(i);
if (min.isNaN() || max.isNaN()) {
return Domain.create(ValueSet.all(type), hasNullValue);
}
ranges.add(Range.range(type, (long) floatToRawIntBits(min), true, (long) floatToRawIntBits(max), true));
}
checkArgument(!ranges.isEmpty(), "cannot use empty ranges");
return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
}
if (type.equals(DOUBLE)) {
for (int i = 0; i < minimums.size(); i++) {
Double min = (Double) minimums.get(i);
Double max = (Double) maximums.get(i);
if (min.isNaN() || max.isNaN()) {
return Domain.create(ValueSet.all(type), hasNullValue);
}
ranges.add(Range.range(type, min, true, max, true));
}
checkArgument(!ranges.isEmpty(), "cannot use empty ranges");
return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
}
if (isVarcharType(type)) {
for (int i = 0; i < minimums.size(); i++) {
Slice min = Slices.wrappedBuffer(((Binary) minimums.get(i)).toByteBuffer());
Slice max = Slices.wrappedBuffer(((Binary) maximums.get(i)).toByteBuffer());
ranges.add(Range.range(type, min, true, max, true));
}
checkArgument(!ranges.isEmpty(), "cannot use empty ranges");
return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
}
if (type.equals(DATE)) {
for (int i = 0; i < minimums.size(); i++) {
long min = asLong(minimums.get(i));
long max = asLong(maximums.get(i));
if (isStatisticsOverflow(type, min, max)) {
return Domain.create(ValueSet.all(type), hasNullValue);
}
ranges.add(Range.range(type, min, true, max, true));
}
checkArgument(!ranges.isEmpty(), "cannot use empty ranges");
return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
}
return Domain.create(ValueSet.all(type), hasNullValue);
}
use of com.facebook.presto.common.type.IntegerType.INTEGER in project presto by prestodb.
the class TestHivePageSink method writeTestFile.
private static long writeTestFile(HiveClientConfig config, MetastoreClientConfig metastoreClientConfig, ExtendedHiveMetastore metastore, String outputPath) {
HiveTransactionHandle transaction = new HiveTransactionHandle();
HiveWriterStats stats = new HiveWriterStats();
ConnectorPageSink pageSink = createPageSink(transaction, config, metastoreClientConfig, metastore, new Path("file:///" + outputPath), stats);
List<LineItemColumn> columns = getTestColumns();
List<Type> columnTypes = columns.stream().map(LineItemColumn::getType).map(TestHivePageSink::getHiveType).map(hiveType -> hiveType.getType(FUNCTION_AND_TYPE_MANAGER)).collect(toList());
PageBuilder pageBuilder = new PageBuilder(columnTypes);
int rows = 0;
for (LineItem lineItem : new LineItemGenerator(0.01, 1, 1)) {
rows++;
if (rows >= NUM_ROWS) {
break;
}
pageBuilder.declarePosition();
for (int i = 0; i < columns.size(); i++) {
LineItemColumn column = columns.get(i);
BlockBuilder blockBuilder = pageBuilder.getBlockBuilder(i);
switch(column.getType().getBase()) {
case IDENTIFIER:
BIGINT.writeLong(blockBuilder, column.getIdentifier(lineItem));
break;
case INTEGER:
INTEGER.writeLong(blockBuilder, column.getInteger(lineItem));
break;
case DATE:
DATE.writeLong(blockBuilder, column.getDate(lineItem));
break;
case DOUBLE:
DOUBLE.writeDouble(blockBuilder, column.getDouble(lineItem));
break;
case VARCHAR:
createUnboundedVarcharType().writeSlice(blockBuilder, Slices.utf8Slice(column.getString(lineItem)));
break;
default:
throw new IllegalArgumentException("Unsupported type " + column.getType());
}
}
}
Page page = pageBuilder.build();
pageSink.appendPage(page);
getFutureValue(pageSink.finish());
File outputDir = new File(outputPath);
List<File> files = ImmutableList.copyOf(outputDir.listFiles((dir, name) -> !name.endsWith(".crc")));
File outputFile = getOnlyElement(files);
long length = outputFile.length();
ConnectorPageSource pageSource = createPageSource(transaction, config, metastoreClientConfig, outputFile);
List<Page> pages = new ArrayList<>();
while (!pageSource.isFinished()) {
Page nextPage = pageSource.getNextPage();
if (nextPage != null) {
pages.add(nextPage.getLoadedPage());
}
}
MaterializedResult expectedResults = toMaterializedResult(getSession(config), columnTypes, ImmutableList.of(page));
MaterializedResult results = toMaterializedResult(getSession(config), columnTypes, pages);
assertEquals(results, expectedResults);
assertEquals(stats.getInputPageSizeInBytes().getAllTime().getMax(), page.getRetainedSizeInBytes());
return length;
}
use of com.facebook.presto.common.type.IntegerType.INTEGER in project presto by prestodb.
the class HiveFileFormatBenchmark method createTpchDataSet.
private static <E extends TpchEntity> TestData createTpchDataSet(FileFormat format, TpchTable<E> tpchTable, List<TpchColumn<E>> columns) {
List<String> columnNames = columns.stream().map(TpchColumn::getColumnName).collect(toList());
List<Type> columnTypes = columns.stream().map(HiveFileFormatBenchmark::getColumnType).map(type -> format.supportsDate() || !DATE.equals(type) ? type : createUnboundedVarcharType()).collect(toList());
PageBuilder pageBuilder = new PageBuilder(columnTypes);
ImmutableList.Builder<Page> pages = ImmutableList.builder();
long dataSize = 0;
for (E row : tpchTable.createGenerator(10, 1, 1)) {
pageBuilder.declarePosition();
for (int i = 0; i < columns.size(); i++) {
TpchColumn<E> column = columns.get(i);
BlockBuilder blockBuilder = pageBuilder.getBlockBuilder(i);
switch(column.getType().getBase()) {
case IDENTIFIER:
BIGINT.writeLong(blockBuilder, column.getIdentifier(row));
break;
case INTEGER:
INTEGER.writeLong(blockBuilder, column.getInteger(row));
break;
case DATE:
if (format.supportsDate()) {
DATE.writeLong(blockBuilder, column.getDate(row));
} else {
createUnboundedVarcharType().writeString(blockBuilder, column.getString(row));
}
break;
case DOUBLE:
DOUBLE.writeDouble(blockBuilder, column.getDouble(row));
break;
case VARCHAR:
createUnboundedVarcharType().writeSlice(blockBuilder, Slices.utf8Slice(column.getString(row)));
break;
default:
throw new IllegalArgumentException("Unsupported type " + column.getType());
}
}
if (pageBuilder.isFull()) {
Page page = pageBuilder.build();
pages.add(page);
pageBuilder.reset();
dataSize += page.getSizeInBytes();
if (dataSize >= MIN_DATA_SIZE) {
break;
}
}
}
return new TestData(columnNames, columnTypes, pages.build());
}
use of com.facebook.presto.common.type.IntegerType.INTEGER in project presto by prestodb.
the class HiveUtil method createRecordReader.
public static RecordReader<?, ?> createRecordReader(Configuration configuration, Path path, long start, long length, Properties schema, List<HiveColumnHandle> columns, Map<String, String> customSplitInfo) {
// determine which hive columns we will read
List<HiveColumnHandle> readColumns = ImmutableList.copyOf(filter(columns, column -> column.getColumnType() == REGULAR));
List<Integer> readHiveColumnIndexes = ImmutableList.copyOf(transform(readColumns, HiveColumnHandle::getHiveColumnIndex));
// Tell hive the columns we would like to read, this lets hive optimize reading column oriented files
setReadColumns(configuration, readHiveColumnIndexes);
// Only propagate serialization schema configs by default
Predicate<String> schemaFilter = schemaProperty -> schemaProperty.startsWith("serialization.");
InputFormat<?, ?> inputFormat = getInputFormat(configuration, getInputFormatName(schema), true);
JobConf jobConf = toJobConf(configuration);
FileSplit fileSplit = new FileSplit(path, start, length, (String[]) null);
if (!customSplitInfo.isEmpty() && isHudiRealtimeSplit(customSplitInfo)) {
fileSplit = recreateSplitWithCustomInfo(fileSplit, customSplitInfo);
// Add additional column information for record reader
List<String> readHiveColumnNames = ImmutableList.copyOf(transform(readColumns, HiveColumnHandle::getName));
jobConf.set(READ_COLUMN_NAMES_CONF_STR, Joiner.on(',').join(readHiveColumnNames));
// Remove filter when using customSplitInfo as the record reader requires complete schema configs
schemaFilter = schemaProperty -> true;
}
schema.stringPropertyNames().stream().filter(schemaFilter).forEach(name -> jobConf.set(name, schema.getProperty(name)));
// add Airlift LZO and LZOP to head of codecs list so as to not override existing entries
List<String> codecs = newArrayList(Splitter.on(",").trimResults().omitEmptyStrings().split(jobConf.get("io.compression.codecs", "")));
if (!codecs.contains(LzoCodec.class.getName())) {
codecs.add(0, LzoCodec.class.getName());
}
if (!codecs.contains(LzopCodec.class.getName())) {
codecs.add(0, LzopCodec.class.getName());
}
jobConf.set("io.compression.codecs", codecs.stream().collect(joining(",")));
try {
RecordReader<WritableComparable, Writable> recordReader = (RecordReader<WritableComparable, Writable>) inputFormat.getRecordReader(fileSplit, jobConf, Reporter.NULL);
int headerCount = getHeaderCount(schema);
// Only skip header rows when the split is at the beginning of the file
if (start == 0 && headerCount > 0) {
Utilities.skipHeader(recordReader, headerCount, recordReader.createKey(), recordReader.createValue());
}
int footerCount = getFooterCount(schema);
if (footerCount > 0) {
recordReader = new FooterAwareRecordReader<>(recordReader, footerCount, jobConf);
}
return recordReader;
} catch (IOException e) {
if (e instanceof TextLineLengthLimitExceededException) {
throw new PrestoException(HIVE_BAD_DATA, "Line too long in text file: " + path, e);
}
throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, format("Error opening Hive split %s (offset=%s, length=%s) using %s: %s", path, start, length, getInputFormatName(schema), firstNonNull(e.getMessage(), e.getClass().getName())), e);
}
}
Aggregations