use of io.trino.plugin.hive.HiveType.HIVE_LONG in project trino by trinodb.
the class AbstractTestHive method prepareInvalidBuckets.
private void prepareInvalidBuckets(HiveStorageFormat storageFormat, SchemaTableName tableName) throws Exception {
createEmptyTable(tableName, storageFormat, ImmutableList.of(new Column("id", HIVE_LONG, Optional.empty()), new Column("name", HIVE_STRING, Optional.empty())), ImmutableList.of(), Optional.of(new HiveBucketProperty(ImmutableList.of("id"), BUCKETING_V1, 8, ImmutableList.of())));
MaterializedResult.Builder dataBuilder = MaterializedResult.resultBuilder(SESSION, BIGINT, VARCHAR);
for (long id = 0; id < 100; id++) {
dataBuilder.row(id, String.valueOf(id));
}
insertData(tableName, dataBuilder.build());
try (Transaction transaction = newTransaction()) {
Set<String> files = listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName());
Path bucket2 = files.stream().map(Path::new).filter(path -> path.getName().startsWith("000002_0_")).collect(onlyElement());
Path bucket5 = files.stream().map(Path::new).filter(path -> path.getName().startsWith("000005_0_")).collect(onlyElement());
HdfsContext context = new HdfsContext(newSession());
FileSystem fileSystem = hdfsEnvironment.getFileSystem(context, bucket2);
fileSystem.delete(bucket2, false);
fileSystem.rename(bucket5, bucket2);
}
}
use of io.trino.plugin.hive.HiveType.HIVE_LONG in project trino by trinodb.
the class TestHivePartitionedBucketFunction method testMultiplePartitions.
@Test(dataProvider = "hiveBucketingVersion")
public void testMultiplePartitions(BucketingVersion hiveBucketingVersion) {
int numValues = 1024;
int numBuckets = 10;
Block bucketColumn = createLongSequenceBlockWithNull(numValues);
Page bucketedColumnPage = new Page(bucketColumn);
BucketFunction hiveBucketFunction = bucketFunction(hiveBucketingVersion, numBuckets, ImmutableList.of(HIVE_LONG));
int numPartitions = 8;
List<Long> partitionValues = new ArrayList<>();
for (int i = 0; i < numPartitions - 1; i++) {
partitionValues.addAll(Collections.nCopies(numValues / numPartitions, i * 348349L));
}
partitionValues.addAll(Collections.nCopies(numValues / numPartitions, null));
Block partitionColumn = createLongsBlock(partitionValues);
Page page = new Page(bucketColumn, partitionColumn);
Map<Long, HashMultimap<Integer, Integer>> partitionedBucketPositions = new HashMap<>();
for (int i = 0; i < numValues; i++) {
int hiveBucket = hiveBucketFunction.getBucket(bucketedColumnPage, i);
Long hivePartition = partitionValues.get(i);
// record list of positions for each combination of hive partition and bucket
partitionedBucketPositions.computeIfAbsent(hivePartition, ignored -> HashMultimap.create()).put(hiveBucket, i);
}
BucketFunction hivePartitionedBucketFunction = partitionedBucketFunction(hiveBucketingVersion, numBuckets, ImmutableList.of(HIVE_LONG), ImmutableList.of(BIGINT), 4000);
// All positions of a hive partition and bucket should hash to the same partitioned bucket
for (Map.Entry<Long, HashMultimap<Integer, Integer>> partitionEntry : partitionedBucketPositions.entrySet()) {
for (Map.Entry<Integer, Collection<Integer>> entry : partitionEntry.getValue().asMap().entrySet()) {
assertBucketCount(hivePartitionedBucketFunction, page, entry.getValue(), 1);
}
}
assertBucketCount(hivePartitionedBucketFunction, page, IntStream.range(0, numValues).boxed().collect(toImmutableList()), numBuckets * numPartitions);
}
Aggregations