use of io.prestosql.spi.type.IntegerType.INTEGER in project hetu-core by openlookeng.
the class TestPruneIndexSourceColumns method buildProjectedIndexSource.
private static PlanNode buildProjectedIndexSource(PlanBuilder p, Predicate<Symbol> projectionFilter) {
Symbol orderkey = p.symbol("orderkey", INTEGER);
Symbol custkey = p.symbol("custkey", INTEGER);
Symbol totalprice = p.symbol("totalprice", DOUBLE);
ColumnHandle orderkeyHandle = new TpchColumnHandle(orderkey.getName(), INTEGER);
ColumnHandle custkeyHandle = new TpchColumnHandle(custkey.getName(), INTEGER);
ColumnHandle totalpriceHandle = new TpchColumnHandle(totalprice.getName(), DOUBLE);
return p.project(Assignments.copyOf(ImmutableList.of(orderkey, custkey, totalprice).stream().filter(projectionFilter).collect(Collectors.toMap(v -> v, v -> p.variable(v.getName())))), p.indexSource(new TableHandle(new CatalogName("local"), new TpchTableHandle("orders", TINY_SCALE_FACTOR), TpchTransactionHandle.INSTANCE, Optional.empty()), ImmutableSet.of(orderkey, custkey), ImmutableList.of(orderkey, custkey, totalprice), ImmutableMap.of(orderkey, orderkeyHandle, custkey, custkeyHandle, totalprice, totalpriceHandle), TupleDomain.fromFixedValues(ImmutableMap.of(totalpriceHandle, asNull(DOUBLE)))));
}
use of io.prestosql.spi.type.IntegerType.INTEGER in project hetu-core by openlookeng.
the class TestThriftIndexPageSource method testGetNextPageTwoConcurrentRequests.
@Test
public void testGetNextPageTwoConcurrentRequests() throws Exception {
final int splits = 3;
final int lookupRequestsConcurrency = 2;
final int rowsPerSplit = 1;
List<SettableFuture<PrestoThriftPageResult>> futures = IntStream.range(0, splits).mapToObj(i -> SettableFuture.<PrestoThriftPageResult>create()).collect(toImmutableList());
List<CountDownLatch> signals = IntStream.range(0, splits).mapToObj(i -> new CountDownLatch(1)).collect(toImmutableList());
TestingThriftService client = new TestingThriftService(rowsPerSplit, false, false) {
@Override
public ListenableFuture<PrestoThriftPageResult> getRows(PrestoThriftId splitId, List<String> columns, long maxBytes, PrestoThriftNullableToken nextToken) {
int key = Ints.fromByteArray(splitId.getId());
signals.get(key).countDown();
return futures.get(key);
}
};
ThriftConnectorStats stats = new ThriftConnectorStats();
long pageSizeReceived = 0;
ThriftIndexPageSource pageSource = new ThriftIndexPageSource((context, headers) -> client, ImmutableMap.of(), stats, new ThriftIndexHandle(new SchemaTableName("default", "table1"), TupleDomain.all()), ImmutableList.of(column("a", INTEGER)), ImmutableList.of(column("b", INTEGER)), new InMemoryRecordSet(ImmutableList.of(INTEGER), generateKeys(0, splits)), MAX_BYTES_PER_RESPONSE, lookupRequestsConcurrency);
assertNull(pageSource.getNextPage());
assertEquals((long) stats.getIndexPageSize().getAllTime().getTotal(), 0);
signals.get(0).await(1, SECONDS);
signals.get(1).await(1, SECONDS);
signals.get(2).await(1, SECONDS);
assertEquals(signals.get(0).getCount(), 0, "first request wasn't sent");
assertEquals(signals.get(1).getCount(), 0, "second request wasn't sent");
assertEquals(signals.get(2).getCount(), 1, "third request shouldn't be sent");
// at this point first two requests were sent
assertFalse(pageSource.isFinished());
assertNull(pageSource.getNextPage());
assertEquals((long) stats.getIndexPageSize().getAllTime().getTotal(), 0);
// completing the second request
futures.get(1).set(pageResult(20, null));
Page page = pageSource.getNextPage();
pageSizeReceived += page.getSizeInBytes();
assertEquals((long) stats.getIndexPageSize().getAllTime().getTotal(), pageSizeReceived);
assertNotNull(page);
assertEquals(page.getPositionCount(), 1);
assertEquals(page.getBlock(0).getInt(0, 0), 20);
// not complete yet
assertFalse(pageSource.isFinished());
// once one of the requests completes the next one should be sent
signals.get(2).await(1, SECONDS);
assertEquals(signals.get(2).getCount(), 0, "third request wasn't sent");
// completing the first request
futures.get(0).set(pageResult(10, null));
page = pageSource.getNextPage();
assertNotNull(page);
pageSizeReceived += page.getSizeInBytes();
assertEquals((long) stats.getIndexPageSize().getAllTime().getTotal(), pageSizeReceived);
assertEquals(page.getPositionCount(), 1);
assertEquals(page.getBlock(0).getInt(0, 0), 10);
// still not complete
assertFalse(pageSource.isFinished());
// completing the third request
futures.get(2).set(pageResult(30, null));
page = pageSource.getNextPage();
assertNotNull(page);
pageSizeReceived += page.getSizeInBytes();
assertEquals((long) stats.getIndexPageSize().getAllTime().getTotal(), pageSizeReceived);
assertEquals(page.getPositionCount(), 1);
assertEquals(page.getBlock(0).getInt(0, 0), 30);
// finished now
assertTrue(pageSource.isFinished());
// after completion
assertNull(pageSource.getNextPage());
pageSource.close();
}
use of io.prestosql.spi.type.IntegerType.INTEGER in project hetu-core by openlookeng.
the class RcFileTester method preprocessWriteValueOld.
private static Object preprocessWriteValueOld(Format format, Type type, Object value) {
if (value == null) {
return null;
}
if (type.equals(BOOLEAN)) {
return value;
}
if (type.equals(TINYINT)) {
return ((Number) value).byteValue();
}
if (type.equals(SMALLINT)) {
return ((Number) value).shortValue();
}
if (type.equals(INTEGER)) {
return ((Number) value).intValue();
}
if (type.equals(BIGINT)) {
return ((Number) value).longValue();
}
if (type.equals(REAL)) {
return ((Number) value).floatValue();
}
if (type.equals(DOUBLE)) {
return ((Number) value).doubleValue();
}
if (type instanceof VarcharType) {
return value;
}
if (type.equals(VARBINARY)) {
return ((SqlVarbinary) value).getBytes();
}
if (type.equals(DATE)) {
return Date.ofEpochDay(((SqlDate) value).getDays());
}
if (type.equals(TIMESTAMP)) {
long millis = ((SqlTimestamp) value).getMillis();
if (format == Format.BINARY) {
millis = HIVE_STORAGE_TIME_ZONE.convertLocalToUTC(millis, false);
}
return Timestamp.ofEpochMilli(millis);
}
if (type instanceof DecimalType) {
return HiveDecimal.create(((SqlDecimal) value).toBigDecimal());
}
if (type.getTypeSignature().getBase().equals(ARRAY)) {
Type elementType = type.getTypeParameters().get(0);
return ((List<?>) value).stream().map(element -> preprocessWriteValueOld(format, elementType, element)).collect(toList());
}
if (type.getTypeSignature().getBase().equals(MAP)) {
Type keyType = type.getTypeParameters().get(0);
Type valueType = type.getTypeParameters().get(1);
Map<Object, Object> newMap = new HashMap<>();
for (Entry<?, ?> entry : ((Map<?, ?>) value).entrySet()) {
newMap.put(preprocessWriteValueOld(format, keyType, entry.getKey()), preprocessWriteValueOld(format, valueType, entry.getValue()));
}
return newMap;
}
if (type.getTypeSignature().getBase().equals(ROW)) {
List<?> fieldValues = (List<?>) value;
List<Type> fieldTypes = type.getTypeParameters();
List<Object> newStruct = new ArrayList<>();
for (int fieldId = 0; fieldId < fieldValues.size(); fieldId++) {
newStruct.add(preprocessWriteValueOld(format, fieldTypes.get(fieldId), fieldValues.get(fieldId)));
}
return newStruct;
}
throw new IllegalArgumentException("unsupported type: " + type);
}
use of io.prestosql.spi.type.IntegerType.INTEGER in project hetu-core by openlookeng.
the class HiveUtil method createRecordReader.
public static RecordReader<?, ?> createRecordReader(Configuration configuration, Path path, long start, long length, Properties schema, List<HiveColumnHandle> columns, Map<String, String> customSplitInfo) {
// determine which hive columns we will read
List<HiveColumnHandle> readColumns = ImmutableList.copyOf(filter(columns, column -> column.getColumnType() == HiveColumnHandle.ColumnType.REGULAR));
List<Integer> readHiveColumnIndexes = ImmutableList.copyOf(transform(readColumns, HiveColumnHandle::getHiveColumnIndex));
// Tell hive the columns we would like to read, this lets hive optimize reading column oriented files
setReadColumns(configuration, readHiveColumnIndexes);
// Only propagate serialization schema configs by default
Predicate<String> schemaFilter = schemaProperty -> schemaProperty.startsWith("serialization.");
JobConf jobConf = ConfigurationUtils.toJobConf(configuration);
InputFormat<?, ?> inputFormat = getInputFormat(configuration, schema, true, jobConf);
FileSplit fileSplit = new FileSplit(path, start, length, (String[]) null);
if (!customSplitInfo.isEmpty() && isHudiRealtimeSplit(customSplitInfo)) {
fileSplit = recreateSplitWithCustomInfo(fileSplit, customSplitInfo);
// Add additional column information for record reader
List<String> readHiveColumnNames = ImmutableList.copyOf(transform(readColumns, HiveColumnHandle::getName));
jobConf.set(READ_COLUMN_NAMES_CONF_STR, Joiner.on(',').join(readHiveColumnNames));
// Remove filter when using customSplitInfo as the record reader requires complete schema configs
schemaFilter = schemaProperty -> true;
}
schema.stringPropertyNames().stream().filter(schemaFilter).forEach(name -> jobConf.set(name, schema.getProperty(name)));
// add Airlift LZO and LZOP to head of codecs list so as to not override existing entries
List<String> codecs = newArrayList(Splitter.on(",").trimResults().omitEmptyStrings().split(jobConf.get("io.compression.codecs", "")));
if (!codecs.contains(LzoCodec.class.getName())) {
codecs.add(0, LzoCodec.class.getName());
}
if (!codecs.contains(LzopCodec.class.getName())) {
codecs.add(0, LzopCodec.class.getName());
}
jobConf.set("io.compression.codecs", codecs.stream().collect(joining(",")));
try {
RecordReader<WritableComparable, Writable> recordReader = (RecordReader<WritableComparable, Writable>) inputFormat.getRecordReader(fileSplit, jobConf, Reporter.NULL);
int headerCount = getHeaderCount(schema);
if (headerCount > 0) {
Utilities.skipHeader(recordReader, headerCount, recordReader.createKey(), recordReader.createValue());
}
int footerCount = getFooterCount(schema);
if (footerCount > 0) {
recordReader = new FooterAwareRecordReader<>(recordReader, footerCount, jobConf);
}
return recordReader;
} catch (IOException e) {
if (e instanceof TextLineLengthLimitExceededException) {
throw new PrestoException(HiveErrorCode.HIVE_BAD_DATA, "Line too long in text file: " + path, e);
}
throw new PrestoException(HiveErrorCode.HIVE_CANNOT_OPEN_SPLIT, format("Error opening Hive split %s (offset=%s, length=%s) using %s: %s", path, start, length, getInputFormatName(schema), firstNonNull(e.getMessage(), e.getClass().getName())), e);
}
}
use of io.prestosql.spi.type.IntegerType.INTEGER in project hetu-core by openlookeng.
the class OrcFileWriterFactory method createFileWriter.
@Override
public Optional<HiveFileWriter> createFileWriter(Path path, List<String> inputColumnNames, StorageFormat storageFormat, Properties schema, JobConf configuration, ConnectorSession session, Optional<AcidOutputFormat.Options> acidOptions, Optional<HiveACIDWriteType> acidWriteType) {
if (!OrcOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) {
return Optional.empty();
}
CompressionKind compression = getCompression(schema, configuration);
// existing tables and partitions may have columns in a different order than the writer is providing, so build
// an index to rearrange columns in the proper order
List<String> fileColumnNames = getColumnNames(schema);
List<Type> fileColumnTypes = getColumnTypes(schema).stream().map(hiveType -> hiveType.getType(typeManager)).collect(toList());
List<Type> dataFileColumnTypes = fileColumnTypes;
int[] fileInputColumnIndexes = fileColumnNames.stream().mapToInt(inputColumnNames::indexOf).toArray();
Optional<HiveFileWriter> deleteDeltaWriter = Optional.empty();
if (AcidUtils.isTablePropertyTransactional(schema) && !AcidUtils.isInsertOnlyTable(schema)) {
ImmutableList<String> orcFileColumnNames = ImmutableList.of(OrcPageSourceFactory.ACID_COLUMN_OPERATION, OrcPageSourceFactory.ACID_COLUMN_ORIGINAL_TRANSACTION, OrcPageSourceFactory.ACID_COLUMN_BUCKET, OrcPageSourceFactory.ACID_COLUMN_ROW_ID, OrcPageSourceFactory.ACID_COLUMN_CURRENT_TRANSACTION, OrcPageSourceFactory.ACID_COLUMN_ROW_STRUCT);
ImmutableList.Builder<RowType.Field> fieldsBuilder = ImmutableList.builder();
for (int i = 0; i < fileColumnNames.size(); i++) {
fieldsBuilder.add(new RowType.Field(Optional.of(fileColumnNames.get(i)), fileColumnTypes.get(i)));
}
ImmutableList<Type> orcFileColumnTypes = ImmutableList.of(INTEGER, BIGINT, INTEGER, BIGINT, BIGINT, RowType.from(fieldsBuilder.build()));
fileColumnNames = orcFileColumnNames;
fileColumnTypes = orcFileColumnTypes;
if (acidWriteType.isPresent() && acidWriteType.get() == HiveACIDWriteType.UPDATE) {
AcidOutputFormat.Options deleteOptions = acidOptions.get().clone().writingDeleteDelta(true);
Path deleteDeltaPath = AcidUtils.createFilename(path.getParent().getParent(), deleteOptions);
deleteDeltaWriter = createFileWriter(deleteDeltaPath, inputColumnNames, storageFormat, schema, configuration, session, Optional.of(deleteOptions), Optional.of(HiveACIDWriteType.DELETE));
}
}
try {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), path, configuration);
OrcDataSink orcDataSink = createOrcDataSink(session, fileSystem, path);
Optional<Supplier<OrcDataSource>> validationInputFactory = Optional.empty();
if (HiveSessionProperties.isOrcOptimizedWriterValidate(session)) {
validationInputFactory = Optional.of(() -> {
try {
FileStatus fileStatus = fileSystem.getFileStatus(path);
return new HdfsOrcDataSource(new OrcDataSourceId(path.toString()), fileStatus.getLen(), HiveSessionProperties.getOrcMaxMergeDistance(session), HiveSessionProperties.getOrcMaxBufferSize(session), HiveSessionProperties.getOrcStreamBufferSize(session), false, fileSystem.open(path), readStats, fileStatus.getModificationTime());
} catch (IOException e) {
throw new PrestoException(HiveErrorCode.HIVE_WRITE_VALIDATION_FAILED, e);
}
});
}
Callable<Void> rollbackAction = () -> {
fileSystem.delete(path, false);
return null;
};
return Optional.of(new OrcFileWriter(orcDataSink, rollbackAction, fileColumnNames, fileColumnTypes, dataFileColumnTypes, compression, orcWriterOptions.withStripeMinSize(HiveSessionProperties.getOrcOptimizedWriterMinStripeSize(session)).withStripeMaxSize(HiveSessionProperties.getOrcOptimizedWriterMaxStripeSize(session)).withStripeMaxRowCount(HiveSessionProperties.getOrcOptimizedWriterMaxStripeRows(session)).withDictionaryMaxMemory(HiveSessionProperties.getOrcOptimizedWriterMaxDictionaryMemory(session)).withMaxStringStatisticsLimit(HiveSessionProperties.getOrcStringStatisticsLimit(session)), writeLegacyVersion, fileInputColumnIndexes, ImmutableMap.<String, String>builder().put(HiveMetadata.PRESTO_VERSION_NAME, nodeVersion.toString()).put(HiveMetadata.PRESTO_QUERY_ID_NAME, session.getQueryId()).put("hive.acid.version", String.valueOf(AcidUtils.OrcAcidVersion.ORC_ACID_VERSION)).build(), validationInputFactory, HiveSessionProperties.getOrcOptimizedWriterValidateMode(session), stats, acidOptions, acidWriteType, deleteDeltaWriter, path));
} catch (IOException e) {
throw new PrestoException(HiveErrorCode.HIVE_WRITER_OPEN_ERROR, "Error creating ORC file", e);
}
}
Aggregations