use of io.prestosql.orc.OrcDataSourceId in project hetu-core by openlookeng.
the class TestDecimalStream method testReadToEdgeOfChunkShort.
@Test
public void testReadToEdgeOfChunkShort() throws IOException {
OrcChunkLoader loader = new TestingChunkLoader(new OrcDataSourceId("read to edge of chunk short"), ImmutableList.of(encodeValues(ImmutableList.of(BigInteger.valueOf(Long.MAX_VALUE))), encodeValues(ImmutableList.of(BigInteger.valueOf(Long.MAX_VALUE)))));
DecimalInputStream stream = new DecimalInputStream(loader);
assertEquals(nextShortDecimalValue(stream), Long.MAX_VALUE);
assertEquals(nextShortDecimalValue(stream), Long.MAX_VALUE);
}
use of io.prestosql.orc.OrcDataSourceId in project hetu-core by openlookeng.
the class TestDecimalStream method testSkipToEdgeOfChunkShort.
@Test
public void testSkipToEdgeOfChunkShort() throws IOException {
OrcChunkLoader loader = new TestingChunkLoader(new OrcDataSourceId("skip to edge of chunk short"), ImmutableList.of(encodeValues(ImmutableList.of(BigInteger.valueOf(Long.MAX_VALUE))), encodeValues(ImmutableList.of(BigInteger.valueOf(Long.MAX_VALUE)))));
DecimalInputStream stream = new DecimalInputStream(loader);
stream.skip(1);
assertEquals(nextShortDecimalValue(stream), Long.MAX_VALUE);
}
use of io.prestosql.orc.OrcDataSourceId in project hetu-core by openlookeng.
the class TestLongDecode method assertVIntRoundTrip.
private static void assertVIntRoundTrip(SliceOutput output, long value, boolean signed) throws IOException {
// write using Hive's code
output.reset();
if (signed) {
writeVslong(output, value);
} else {
writeVulong(output, value);
}
Slice hiveBytes = Slices.copyOf(output.slice());
// write using Presto's code, and verify they are the same
output.reset();
writeVLong(output, value, signed);
Slice prestoBytes = Slices.copyOf(output.slice());
if (!prestoBytes.equals(hiveBytes)) {
assertEquals(prestoBytes, hiveBytes);
}
// read using Hive's code
if (signed) {
long readValueOld = readVslong(hiveBytes.getInput());
assertEquals(readValueOld, value);
} else {
long readValueOld = readVulong(hiveBytes.getInput());
assertEquals(readValueOld, value);
}
// read using Presto's code
long readValueNew = readVInt(signed, new OrcInputStream(OrcChunkLoader.create(new OrcDataSourceId("test"), hiveBytes, Optional.empty(), newSimpleAggregatedMemoryContext())));
assertEquals(readValueNew, value);
}
use of io.prestosql.orc.OrcDataSourceId in project hetu-core by openlookeng.
the class OrcFileWriterFactory method createFileWriter.
@Override
public Optional<HiveFileWriter> createFileWriter(Path path, List<String> inputColumnNames, StorageFormat storageFormat, Properties schema, JobConf configuration, ConnectorSession session, Optional<AcidOutputFormat.Options> acidOptions, Optional<HiveACIDWriteType> acidWriteType) {
if (!OrcOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) {
return Optional.empty();
}
CompressionKind compression = getCompression(schema, configuration);
// existing tables and partitions may have columns in a different order than the writer is providing, so build
// an index to rearrange columns in the proper order
List<String> fileColumnNames = getColumnNames(schema);
List<Type> fileColumnTypes = getColumnTypes(schema).stream().map(hiveType -> hiveType.getType(typeManager)).collect(toList());
List<Type> dataFileColumnTypes = fileColumnTypes;
int[] fileInputColumnIndexes = fileColumnNames.stream().mapToInt(inputColumnNames::indexOf).toArray();
Optional<HiveFileWriter> deleteDeltaWriter = Optional.empty();
if (AcidUtils.isTablePropertyTransactional(schema) && !AcidUtils.isInsertOnlyTable(schema)) {
ImmutableList<String> orcFileColumnNames = ImmutableList.of(OrcPageSourceFactory.ACID_COLUMN_OPERATION, OrcPageSourceFactory.ACID_COLUMN_ORIGINAL_TRANSACTION, OrcPageSourceFactory.ACID_COLUMN_BUCKET, OrcPageSourceFactory.ACID_COLUMN_ROW_ID, OrcPageSourceFactory.ACID_COLUMN_CURRENT_TRANSACTION, OrcPageSourceFactory.ACID_COLUMN_ROW_STRUCT);
ImmutableList.Builder<RowType.Field> fieldsBuilder = ImmutableList.builder();
for (int i = 0; i < fileColumnNames.size(); i++) {
fieldsBuilder.add(new RowType.Field(Optional.of(fileColumnNames.get(i)), fileColumnTypes.get(i)));
}
ImmutableList<Type> orcFileColumnTypes = ImmutableList.of(INTEGER, BIGINT, INTEGER, BIGINT, BIGINT, RowType.from(fieldsBuilder.build()));
fileColumnNames = orcFileColumnNames;
fileColumnTypes = orcFileColumnTypes;
if (acidWriteType.isPresent() && acidWriteType.get() == HiveACIDWriteType.UPDATE) {
AcidOutputFormat.Options deleteOptions = acidOptions.get().clone().writingDeleteDelta(true);
Path deleteDeltaPath = AcidUtils.createFilename(path.getParent().getParent(), deleteOptions);
deleteDeltaWriter = createFileWriter(deleteDeltaPath, inputColumnNames, storageFormat, schema, configuration, session, Optional.of(deleteOptions), Optional.of(HiveACIDWriteType.DELETE));
}
}
try {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), path, configuration);
OrcDataSink orcDataSink = createOrcDataSink(session, fileSystem, path);
Optional<Supplier<OrcDataSource>> validationInputFactory = Optional.empty();
if (HiveSessionProperties.isOrcOptimizedWriterValidate(session)) {
validationInputFactory = Optional.of(() -> {
try {
FileStatus fileStatus = fileSystem.getFileStatus(path);
return new HdfsOrcDataSource(new OrcDataSourceId(path.toString()), fileStatus.getLen(), HiveSessionProperties.getOrcMaxMergeDistance(session), HiveSessionProperties.getOrcMaxBufferSize(session), HiveSessionProperties.getOrcStreamBufferSize(session), false, fileSystem.open(path), readStats, fileStatus.getModificationTime());
} catch (IOException e) {
throw new PrestoException(HiveErrorCode.HIVE_WRITE_VALIDATION_FAILED, e);
}
});
}
Callable<Void> rollbackAction = () -> {
fileSystem.delete(path, false);
return null;
};
return Optional.of(new OrcFileWriter(orcDataSink, rollbackAction, fileColumnNames, fileColumnTypes, dataFileColumnTypes, compression, orcWriterOptions.withStripeMinSize(HiveSessionProperties.getOrcOptimizedWriterMinStripeSize(session)).withStripeMaxSize(HiveSessionProperties.getOrcOptimizedWriterMaxStripeSize(session)).withStripeMaxRowCount(HiveSessionProperties.getOrcOptimizedWriterMaxStripeRows(session)).withDictionaryMaxMemory(HiveSessionProperties.getOrcOptimizedWriterMaxDictionaryMemory(session)).withMaxStringStatisticsLimit(HiveSessionProperties.getOrcStringStatisticsLimit(session)), writeLegacyVersion, fileInputColumnIndexes, ImmutableMap.<String, String>builder().put(HiveMetadata.PRESTO_VERSION_NAME, nodeVersion.toString()).put(HiveMetadata.PRESTO_QUERY_ID_NAME, session.getQueryId()).put("hive.acid.version", String.valueOf(AcidUtils.OrcAcidVersion.ORC_ACID_VERSION)).build(), validationInputFactory, HiveSessionProperties.getOrcOptimizedWriterValidateMode(session), stats, acidOptions, acidWriteType, deleteDeltaWriter, path));
} catch (IOException e) {
throw new PrestoException(HiveErrorCode.HIVE_WRITER_OPEN_ERROR, "Error creating ORC file", e);
}
}
use of io.prestosql.orc.OrcDataSourceId in project hetu-core by openlookeng.
the class TestDecimalStream method testSkipToEdgeOfChunkLong.
@Test
public void testSkipToEdgeOfChunkLong() throws IOException {
OrcChunkLoader loader = new TestingChunkLoader(new OrcDataSourceId("skip to edge of chunk long"), ImmutableList.of(encodeValues(ImmutableList.of(BigInteger.valueOf(Long.MAX_VALUE))), encodeValues(ImmutableList.of(BigInteger.valueOf(Long.MAX_VALUE)))));
DecimalInputStream stream = new DecimalInputStream(loader);
stream.skip(1);
assertEquals(nextLongDecimalValue(stream), BigInteger.valueOf(Long.MAX_VALUE));
}
Aggregations