use of io.prestosql.spi.type.IntegerType.INTEGER in project hetu-core by openlookeng.
the class ElasticsearchMetadata method toPrestoType.
private Type toPrestoType(IndexMetadata.Field metaDataField, boolean isArray) {
IndexMetadata.Type type = metaDataField.getType();
if (isArray) {
Type elementType = toPrestoType(metaDataField, false);
return new ArrayType(elementType);
}
if (type instanceof PrimitiveType) {
switch(((PrimitiveType) type).getName()) {
case "float":
return REAL;
case "double":
return DOUBLE;
case "byte":
return TINYINT;
case "short":
return SMALLINT;
case "integer":
return INTEGER;
case "long":
return BIGINT;
case "string":
case "text":
case "keyword":
return VARCHAR;
case "ip":
return ipAddressType;
case "boolean":
return BOOLEAN;
case "binary":
return VARBINARY;
default:
break;
}
} else if (type instanceof DateTimeType) {
if (((DateTimeType) type).getFormats().isEmpty()) {
return TIMESTAMP;
}
// otherwise, skip -- we don't support custom formats, yet
} else if (type instanceof ObjectType) {
ObjectType objectType = (ObjectType) type;
List<RowType.Field> fields = objectType.getFields().stream().map(field -> RowType.field(field.getName(), toPrestoType(field))).collect(toImmutableList());
return RowType.from(fields);
}
return null;
}
use of io.prestosql.spi.type.IntegerType.INTEGER in project hetu-core by openlookeng.
the class TestMergingPageIterator method testMerging.
@Test
public void testMerging() {
List<Type> types = ImmutableList.of(INTEGER, INTEGER);
List<Integer> sortIndexes = ImmutableList.of(1);
List<SortOrder> sortOrders = ImmutableList.of(SortOrder.ASC_NULLS_FIRST);
List<List<Page>> pageLists = new ArrayList<>();
PageBuilder pageBuilder = new PageBuilder(types);
for (int i = 0; i < 10; i++) {
Iterator<Integer> values = IntStream.range(0, 1000).map(ignored -> ThreadLocalRandom.current().nextInt(100_000)).mapToObj(n -> ((n % 100) == 0) ? null : n).sorted(nullsFirst(naturalOrder())).iterator();
List<Page> pages = new ArrayList<>();
for (int j = 0; j < 10; j++) {
for (int k = 0; k < 100; k++) {
Integer n = values.next();
pageBuilder.declarePosition();
if (n == null) {
pageBuilder.getBlockBuilder(0).appendNull();
pageBuilder.getBlockBuilder(1).appendNull();
} else {
INTEGER.writeLong(pageBuilder.getBlockBuilder(0), n);
INTEGER.writeLong(pageBuilder.getBlockBuilder(1), n * 22L);
}
}
pages.add(pageBuilder.build());
pageBuilder.reset();
}
pageLists.add(pages);
assertFalse(values.hasNext());
}
List<Iterator<Page>> pages = pageLists.stream().map(List::iterator).collect(toList());
Iterator<Page> iterator = new MergingPageIterator(pages, types, sortIndexes, sortOrders);
List<Long> values = new ArrayList<>();
while (iterator.hasNext()) {
Page page = iterator.next();
for (int i = 0; i < page.getPositionCount(); i++) {
if (page.getBlock(0).isNull(i)) {
assertTrue(page.getBlock(1).isNull(i));
values.add(null);
} else {
long x = INTEGER.getLong(page.getBlock(0), i);
long y = INTEGER.getLong(page.getBlock(1), i);
assertEquals(y, x * 22);
values.add(x);
}
}
}
assertThat(values).isSortedAccordingTo(nullsFirst(naturalOrder()));
}
use of io.prestosql.spi.type.IntegerType.INTEGER in project boostkit-bigdata by kunpengcompute.
the class HiveFileFormatBenchmark method createTpchDataSet.
private static <E extends TpchEntity> TestData createTpchDataSet(FileFormat format, TpchTable<E> tpchTable, List<TpchColumn<E>> columns) {
List<String> columnNames = columns.stream().map(TpchColumn::getColumnName).collect(toList());
List<Type> columnTypes = columns.stream().map(HiveFileFormatBenchmark::getColumnType).map(type -> format.supportsDate() || !DATE.equals(type) ? type : createUnboundedVarcharType()).collect(toList());
PageBuilder pageBuilder = new PageBuilder(columnTypes);
ImmutableList.Builder<Page> pages = ImmutableList.builder();
long dataSize = 0;
for (E row : tpchTable.createGenerator(10, 1, 1)) {
pageBuilder.declarePosition();
for (int i = 0; i < columns.size(); i++) {
TpchColumn<E> column = columns.get(i);
BlockBuilder blockBuilder = pageBuilder.getBlockBuilder(i);
switch(column.getType().getBase()) {
case IDENTIFIER:
BIGINT.writeLong(blockBuilder, column.getIdentifier(row));
break;
case INTEGER:
INTEGER.writeLong(blockBuilder, column.getInteger(row));
break;
case DATE:
if (format.supportsDate()) {
DATE.writeLong(blockBuilder, column.getDate(row));
} else {
createUnboundedVarcharType().writeString(blockBuilder, column.getString(row));
}
break;
case DOUBLE:
DOUBLE.writeDouble(blockBuilder, column.getDouble(row));
break;
case VARCHAR:
createUnboundedVarcharType().writeSlice(blockBuilder, Slices.utf8Slice(column.getString(row)));
break;
default:
throw new IllegalArgumentException("Unsupported type " + column.getType());
}
}
if (pageBuilder.isFull()) {
Page page = pageBuilder.build();
pages.add(page);
pageBuilder.reset();
dataSize += page.getSizeInBytes();
if (dataSize >= MIN_DATA_SIZE) {
break;
}
}
}
return new TestData(columnNames, columnTypes, pages.build());
}
use of io.prestosql.spi.type.IntegerType.INTEGER in project boostkit-bigdata by kunpengcompute.
the class HiveUtil method createRecordReader.
public static RecordReader<?, ?> createRecordReader(Configuration configuration, Path path, long start, long length, Properties schema, List<HiveColumnHandle> columns, Map<String, String> customSplitInfo) {
// determine which hive columns we will read
List<HiveColumnHandle> readColumns = ImmutableList.copyOf(filter(columns, column -> column.getColumnType() == HiveColumnHandle.ColumnType.REGULAR));
List<Integer> readHiveColumnIndexes = ImmutableList.copyOf(transform(readColumns, HiveColumnHandle::getHiveColumnIndex));
// Tell hive the columns we would like to read, this lets hive optimize reading column oriented files
setReadColumns(configuration, readHiveColumnIndexes);
// Only propagate serialization schema configs by default
Predicate<String> schemaFilter = schemaProperty -> schemaProperty.startsWith("serialization.");
JobConf jobConf = ConfigurationUtils.toJobConf(configuration);
InputFormat<?, ?> inputFormat = getInputFormat(configuration, schema, true, jobConf);
FileSplit fileSplit = new FileSplit(path, start, length, (String[]) null);
if (!customSplitInfo.isEmpty() && isHudiRealtimeSplit(customSplitInfo)) {
fileSplit = recreateSplitWithCustomInfo(fileSplit, customSplitInfo);
// Add additional column information for record reader
List<String> readHiveColumnNames = ImmutableList.copyOf(transform(readColumns, HiveColumnHandle::getName));
jobConf.set(READ_COLUMN_NAMES_CONF_STR, Joiner.on(',').join(readHiveColumnNames));
// Remove filter when using customSplitInfo as the record reader requires complete schema configs
schemaFilter = schemaProperty -> true;
}
schema.stringPropertyNames().stream().filter(schemaFilter).forEach(name -> jobConf.set(name, schema.getProperty(name)));
// add Airlift LZO and LZOP to head of codecs list so as to not override existing entries
List<String> codecs = newArrayList(Splitter.on(",").trimResults().omitEmptyStrings().split(jobConf.get("io.compression.codecs", "")));
if (!codecs.contains(LzoCodec.class.getName())) {
codecs.add(0, LzoCodec.class.getName());
}
if (!codecs.contains(LzopCodec.class.getName())) {
codecs.add(0, LzopCodec.class.getName());
}
jobConf.set("io.compression.codecs", codecs.stream().collect(joining(",")));
try {
RecordReader<WritableComparable, Writable> recordReader = (RecordReader<WritableComparable, Writable>) inputFormat.getRecordReader(fileSplit, jobConf, Reporter.NULL);
int headerCount = getHeaderCount(schema);
if (headerCount > 0) {
Utilities.skipHeader(recordReader, headerCount, recordReader.createKey(), recordReader.createValue());
}
int footerCount = getFooterCount(schema);
if (footerCount > 0) {
recordReader = new FooterAwareRecordReader<>(recordReader, footerCount, jobConf);
}
return recordReader;
} catch (IOException e) {
if (e instanceof TextLineLengthLimitExceededException) {
throw new PrestoException(HiveErrorCode.HIVE_BAD_DATA, "Line too long in text file: " + path, e);
}
throw new PrestoException(HiveErrorCode.HIVE_CANNOT_OPEN_SPLIT, format("Error opening Hive split %s (offset=%s, length=%s) using %s: %s", path, start, length, getInputFormatName(schema), firstNonNull(e.getMessage(), e.getClass().getName())), e);
}
}
use of io.prestosql.spi.type.IntegerType.INTEGER in project boostkit-bigdata by kunpengcompute.
the class TestHivePageSink method writeTestFile.
private static long writeTestFile(HiveConfig config, HiveMetastore metastore, String outputPath) {
HiveTransactionHandle transaction = new HiveTransactionHandle();
HiveWriterStats stats = new HiveWriterStats();
ConnectorPageSink pageSink = createPageSink(transaction, config, metastore, new Path("file:///" + outputPath), stats);
List<LineItemColumn> columns = getTestColumns();
List<Type> columnTypes = columns.stream().map(LineItemColumn::getType).map(TestHivePageSink::getHiveType).map(hiveType -> hiveType.getType(HiveTestUtils.TYPE_MANAGER)).collect(toList());
PageBuilder pageBuilder = new PageBuilder(columnTypes);
int rows = 0;
for (LineItem lineItem : new LineItemGenerator(0.01, 1, 1)) {
rows++;
if (rows >= NUM_ROWS) {
break;
}
pageBuilder.declarePosition();
for (int i = 0; i < columns.size(); i++) {
LineItemColumn column = columns.get(i);
BlockBuilder blockBuilder = pageBuilder.getBlockBuilder(i);
switch(column.getType().getBase()) {
case IDENTIFIER:
BIGINT.writeLong(blockBuilder, column.getIdentifier(lineItem));
break;
case INTEGER:
INTEGER.writeLong(blockBuilder, column.getInteger(lineItem));
break;
case DATE:
DATE.writeLong(blockBuilder, column.getDate(lineItem));
break;
case DOUBLE:
DOUBLE.writeDouble(blockBuilder, column.getDouble(lineItem));
break;
case VARCHAR:
createUnboundedVarcharType().writeSlice(blockBuilder, Slices.utf8Slice(column.getString(lineItem)));
break;
default:
throw new IllegalArgumentException("Unsupported type " + column.getType());
}
}
}
Page page = pageBuilder.build();
pageSink.appendPage(page);
getFutureValue(pageSink.finish());
File outputDir = new File(outputPath);
List<File> files = ImmutableList.copyOf(outputDir.listFiles((dir, name) -> !name.endsWith(".crc")));
File outputFile = getOnlyElement(files);
long length = outputFile.length();
ConnectorPageSource pageSource = createPageSource(transaction, config, outputFile);
List<Page> pages = new ArrayList<>();
while (!pageSource.isFinished()) {
Page nextPage = pageSource.getNextPage();
if (nextPage != null) {
pages.add(nextPage.getLoadedPage());
}
}
MaterializedResult expectedResults = toMaterializedResult(getSession(config), columnTypes, ImmutableList.of(page));
MaterializedResult results = toMaterializedResult(getSession(config), columnTypes, pages);
assertEquals(results, expectedResults);
assertEquals(round(stats.getInputPageSizeInBytes().getAllTime().getMax()), page.getRetainedSizeInBytes());
return length;
}
Aggregations