use of io.prestosql.plugin.hive.authentication.HiveIdentity in project carbondata by apache.
the class CarbondataSplitManager method getSplits.
@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorTableHandle tableHandle, SplitSchedulingStrategy splitSchedulingStrategy) {
HiveTableHandle hiveTableHandle = (HiveTableHandle) tableHandle;
SchemaTableName schemaTableName = hiveTableHandle.getSchemaTableName();
carbonTableReader.setPrestoQueryId(session.getQueryId());
// get table metadata
SemiTransactionalHiveMetastore metastore = metastoreProvider.apply((HiveTransactionHandle) transactionHandle);
Table table = metastore.getTable(new HiveIdentity(session), schemaTableName.getSchemaName(), schemaTableName.getTableName()).orElseThrow(() -> new TableNotFoundException(schemaTableName));
if (!table.getStorage().getStorageFormat().getInputFormat().contains("carbon")) {
return super.getSplits(transactionHandle, session, tableHandle, splitSchedulingStrategy);
}
// for hive metastore, get table location from catalog table's tablePath
String location = table.getStorage().getSerdeParameters().get("tablePath");
if (StringUtils.isEmpty(location)) {
// file metastore case tablePath can be null, so get from location
location = table.getStorage().getLocation();
}
List<PartitionSpec> filteredPartitions = new ArrayList<>();
if (hiveTableHandle.getPartitionColumns().size() > 0 && hiveTableHandle.getPartitions().isPresent()) {
List<String> colNames = hiveTableHandle.getPartitionColumns().stream().map(HiveColumnHandle::getName).collect(Collectors.toList());
for (HivePartition partition : hiveTableHandle.getPartitions().get()) {
filteredPartitions.add(new PartitionSpec(colNames, location + CarbonCommonConstants.FILE_SEPARATOR + partition.getPartitionId()));
}
}
String queryId = System.nanoTime() + "";
QueryStatistic statistic = new QueryStatistic();
QueryStatisticsRecorder statisticRecorder = CarbonTimeStatisticsFactory.createDriverRecorder();
statistic.addStatistics(QueryStatisticsConstants.BLOCK_ALLOCATION, System.currentTimeMillis());
statisticRecorder.recordStatisticsForDriver(statistic, queryId);
statistic = new QueryStatistic();
carbonTableReader.setQueryId(queryId);
TupleDomain<HiveColumnHandle> predicate = hiveTableHandle.getCompactEffectivePredicate();
Configuration configuration = this.hdfsEnvironment.getConfiguration(new HdfsEnvironment.HdfsContext(session, schemaTableName.getSchemaName(), schemaTableName.getTableName()), new Path(location));
configuration = carbonTableReader.updateS3Properties(configuration);
for (Map.Entry<String, String> entry : table.getStorage().getSerdeParameters().entrySet()) {
configuration.set(entry.getKey(), entry.getValue());
}
// set the hadoop configuration to thread local, so that FileFactory can use it.
ThreadLocalSessionInfo.setConfigurationToCurrentThread(configuration);
CarbonTableCacheModel cache = carbonTableReader.getCarbonCache(schemaTableName, location, configuration);
Expression filters = PrestoFilterUtil.parseFilterExpression(predicate);
try {
List<CarbonLocalMultiBlockSplit> splits = carbonTableReader.getInputSplits(cache, filters, filteredPartitions, configuration);
ImmutableList.Builder<ConnectorSplit> cSplits = ImmutableList.builder();
long index = 0;
for (CarbonLocalMultiBlockSplit split : splits) {
index++;
Properties properties = new Properties();
for (Map.Entry<String, String> entry : table.getStorage().getSerdeParameters().entrySet()) {
properties.setProperty(entry.getKey(), entry.getValue());
}
properties.setProperty("tablePath", cache.getCarbonTable().getTablePath());
properties.setProperty("carbonSplit", split.getJsonString());
properties.setProperty("queryId", queryId);
properties.setProperty("index", String.valueOf(index));
cSplits.add(new HiveSplit(schemaTableName.getSchemaName(), schemaTableName.getTableName(), schemaTableName.getTableName(), cache.getCarbonTable().getTablePath(), 0, 0, 0, 0, properties, new ArrayList<>(), getHostAddresses(split.getLocations()), OptionalInt.empty(), false, TableToPartitionMapping.empty(), Optional.empty(), false, Optional.empty()));
}
statisticRecorder.logStatisticsAsTableDriver();
statistic.addStatistics(QueryStatisticsConstants.BLOCK_IDENTIFICATION, System.currentTimeMillis());
statisticRecorder.recordStatisticsForDriver(statistic, queryId);
statisticRecorder.logStatisticsAsTableDriver();
return new FixedSplitSource(cSplits.build());
} catch (Exception ex) {
throw new RuntimeException(ex.getMessage(), ex);
}
}
use of io.prestosql.plugin.hive.authentication.HiveIdentity in project carbondata by apache.
the class CarbonDataPageSinkProvider method createPageSink.
private ConnectorPageSink createPageSink(HiveWritableTableHandle handle, ConnectorSession session, Map<String, String> additionalTableParameters, Map<String, String> additionalConf, boolean isCreateTable) {
OptionalInt bucketCount = OptionalInt.empty();
List<SortingColumn> sortedBy = ImmutableList.of();
if (handle.getBucketProperty().isPresent()) {
bucketCount = OptionalInt.of(handle.getBucketProperty().get().getBucketCount());
sortedBy = handle.getBucketProperty().get().getSortedBy();
}
CarbonDataWriterFactory carbonDataWriterFactory = new CarbonDataWriterFactory(fileWriterFactories, handle.getSchemaName(), handle.getTableName(), isCreateTable, handle.getInputColumns(), handle.getTableStorageFormat(), handle.getPartitionStorageFormat(), additionalTableParameters, bucketCount, sortedBy, handle.getLocationHandle(), locationService, session.getQueryId(), new HivePageSinkMetadataProvider(handle.getPageSinkMetadata(), new HiveMetastoreClosure(memoizeMetastore(metastore, perTransactionMetastoreCacheMaximumSize)), new HiveIdentity(session)), typeManager, hdfsEnvironment, pageSorter, writerSortBufferSize, maxOpenSortFiles, immutablePartitions, session, nodeManager, eventClient, hiveSessionProperties, hiveWriterStats, additionalConf);
return new HivePageSink(carbonDataWriterFactory, handle.getInputColumns(), handle.getBucketProperty(), pageIndexerFactory, hdfsEnvironment, maxOpenPartitions, writeVerificationExecutor, partitionUpdateCodec, session);
}
use of io.prestosql.plugin.hive.authentication.HiveIdentity in project carbondata by apache.
the class CarbonDataMetaData method beginInsert.
@Override
public CarbonDataInsertTableHandle beginInsert(ConnectorSession session, ConnectorTableHandle tableHandle) {
HiveInsertTableHandle hiveInsertTableHandle = super.beginInsert(session, tableHandle);
SchemaTableName tableName = hiveInsertTableHandle.getSchemaTableName();
Optional<Table> table = this.metastore.getTable(new HiveIdentity(session), tableName.getSchemaName(), tableName.getTableName());
Path outputPath = new Path(hiveInsertTableHandle.getLocationHandle().getJsonSerializableTargetPath());
JobConf jobConf = ConfigurationUtils.toJobConf(this.hdfsEnvironment.getConfiguration(new HdfsEnvironment.HdfsContext(session, hiveInsertTableHandle.getSchemaName(), hiveInsertTableHandle.getTableName()), new Path(hiveInsertTableHandle.getLocationHandle().getJsonSerializableWritePath())));
jobConf.set("location", outputPath.toString());
Properties hiveSchema = MetastoreUtil.getHiveSchema(table.get());
try {
CarbonLoadModel carbonLoadModel = HiveCarbonUtil.getCarbonLoadModel(hiveSchema, jobConf);
CarbonTableOutputFormat.setLoadModel(jobConf, carbonLoadModel);
} catch (IOException ex) {
LOG.error("Error while creating carbon load model", ex);
throw new RuntimeException(ex);
}
try {
carbonOutputCommitter = new MapredCarbonOutputCommitter();
jobContext = new JobContextImpl(jobConf, new JobID());
carbonOutputCommitter.setupJob(jobContext);
ThreadLocalSessionInfo.setConfigurationToCurrentThread(jobConf);
} catch (IOException e) {
LOG.error("error setting the output committer", e);
throw new RuntimeException("error setting the output committer");
}
return new CarbonDataInsertTableHandle(hiveInsertTableHandle.getSchemaTableName().getSchemaName(), hiveInsertTableHandle.getTableName(), hiveInsertTableHandle.getInputColumns(), hiveInsertTableHandle.getPageSinkMetadata(), hiveInsertTableHandle.getLocationHandle(), hiveInsertTableHandle.getBucketProperty(), hiveInsertTableHandle.getTableStorageFormat(), hiveInsertTableHandle.getPartitionStorageFormat(), ImmutableMap.of(CarbonTableConfig.CARBON_PRESTO_LOAD_MODEL, jobContext.getConfiguration().get(CarbonTableOutputFormat.LOAD_MODEL)));
}
Aggregations