use of com.facebook.presto.hive.HivePartition in project presto by prestodb.
the class MetastoreHiveStatisticsProvider method getPartitionsSample.
@VisibleForTesting
static List<HivePartition> getPartitionsSample(List<HivePartition> partitions, int sampleSize) {
checkArgument(sampleSize > 0, "sampleSize is expected to be greater than zero");
if (partitions.size() <= sampleSize) {
return partitions;
}
List<HivePartition> result = new ArrayList<>();
int samplesLeft = sampleSize;
HivePartition min = partitions.get(0);
HivePartition max = partitions.get(0);
for (HivePartition partition : partitions) {
if (partition.getPartitionId().compareTo(min.getPartitionId()) < 0) {
min = partition;
} else if (partition.getPartitionId().compareTo(max.getPartitionId()) > 0) {
max = partition;
}
}
result.add(min);
samplesLeft--;
if (samplesLeft > 0) {
result.add(max);
samplesLeft--;
}
if (samplesLeft > 0) {
HashFunction hashFunction = murmur3_128();
Comparator<Map.Entry<HivePartition, Long>> hashComparator = Comparator.<Map.Entry<HivePartition, Long>, Long>comparing(Map.Entry::getValue).thenComparing(entry -> entry.getKey().getPartitionId());
partitions.stream().filter(partition -> !result.contains(partition)).map(partition -> immutableEntry(partition, hashFunction.hashUnencodedChars(partition.getPartitionId()).asLong())).sorted(hashComparator).limit(samplesLeft).forEachOrdered(entry -> result.add(entry.getKey()));
}
return unmodifiableList(result);
}
use of com.facebook.presto.hive.HivePartition in project carbondata by apache.
the class CarbondataSplitManager method getSplits.
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorTableLayoutHandle layoutHandle, SplitSchedulingStrategy splitSchedulingStrategy) {
HiveTableLayoutHandle layout = (HiveTableLayoutHandle) layoutHandle;
SchemaTableName schemaTableName = layout.getSchemaTableName();
carbonTableReader.setPrestoQueryId(session.getQueryId());
// get table metadata
SemiTransactionalHiveMetastore metastore = metastoreProvider.apply((HiveTransactionHandle) transactionHandle);
Table table = metastore.getTable(schemaTableName.getSchemaName(), schemaTableName.getTableName()).orElseThrow(() -> new TableNotFoundException(schemaTableName));
if (!table.getStorage().getStorageFormat().getInputFormat().contains("carbon")) {
return super.getSplits(transactionHandle, session, layoutHandle, splitSchedulingStrategy);
}
// for hive metastore, get table location from catalog table's tablePath
String location = table.getStorage().getSerdeParameters().get("tablePath");
if (StringUtils.isEmpty(location)) {
// file metastore case tablePath can be null, so get from location
location = table.getStorage().getLocation();
}
List<PartitionSpec> filteredPartitions = new ArrayList<>();
if (layout.getPartitionColumns().size() > 0 && layout.getPartitions().isPresent()) {
List<String> colNames = layout.getPartitionColumns().stream().map(x -> ((HiveColumnHandle) x).getName()).collect(Collectors.toList());
for (HivePartition partition : layout.getPartitions().get()) {
filteredPartitions.add(new PartitionSpec(colNames, location + CarbonCommonConstants.FILE_SEPARATOR + partition.getPartitionId()));
}
}
String queryId = System.nanoTime() + "";
QueryStatistic statistic = new QueryStatistic();
QueryStatisticsRecorder statisticRecorder = CarbonTimeStatisticsFactory.createDriverRecorder();
statistic.addStatistics(QueryStatisticsConstants.BLOCK_ALLOCATION, System.currentTimeMillis());
statisticRecorder.recordStatisticsForDriver(statistic, queryId);
statistic = new QueryStatistic();
carbonTableReader.setQueryId(queryId);
TupleDomain<HiveColumnHandle> predicate = (TupleDomain<HiveColumnHandle>) layout.getCompactEffectivePredicate();
Configuration configuration = this.hdfsEnvironment.getConfiguration(new HdfsEnvironment.HdfsContext(session, schemaTableName.getSchemaName(), schemaTableName.getTableName()), new Path(location));
configuration = carbonTableReader.updateS3Properties(configuration);
for (Map.Entry<String, String> entry : table.getStorage().getSerdeParameters().entrySet()) {
configuration.set(entry.getKey(), entry.getValue());
}
// set the hadoop configuration to thread local, so that FileFactory can use it.
ThreadLocalSessionInfo.setConfigurationToCurrentThread(configuration);
CarbonTableCacheModel cache = carbonTableReader.getCarbonCache(schemaTableName, location, configuration);
Expression filters = PrestoFilterUtil.parseFilterExpression(predicate);
try {
List<CarbonLocalMultiBlockSplit> splits = carbonTableReader.getInputSplits(cache, filters, filteredPartitions, configuration);
ImmutableList.Builder<ConnectorSplit> cSplits = ImmutableList.builder();
long index = 0;
for (CarbonLocalMultiBlockSplit split : splits) {
index++;
Properties properties = new Properties();
for (Map.Entry<String, String> entry : table.getStorage().getSerdeParameters().entrySet()) {
properties.setProperty(entry.getKey(), entry.getValue());
}
properties.setProperty("tablePath", cache.getCarbonTable().getTablePath());
properties.setProperty("carbonSplit", split.getJsonString());
properties.setProperty("queryId", queryId);
properties.setProperty("index", String.valueOf(index));
cSplits.add(new HiveSplit(schemaTableName.getSchemaName(), schemaTableName.getTableName(), schemaTableName.getTableName(), "", 0, 0, 0, properties, new ArrayList(), getHostAddresses(split.getLocations()), OptionalInt.empty(), false, predicate, new HashMap<>(), Optional.empty(), false));
}
statisticRecorder.logStatisticsAsTableDriver();
statistic.addStatistics(QueryStatisticsConstants.BLOCK_IDENTIFICATION, System.currentTimeMillis());
statisticRecorder.recordStatisticsForDriver(statistic, queryId);
statisticRecorder.logStatisticsAsTableDriver();
return new FixedSplitSource(cSplits.build());
} catch (Exception ex) {
throw new RuntimeException(ex.getMessage(), ex);
}
}
Aggregations