use of io.prestosql.plugin.hive.HiveSplitWrapper in project hetu-core by openlookeng.
the class CarbondataPageSourceProvider method createPageSource.
@Override
public ConnectorPageSource createPageSource(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorSplit split, ConnectorTableHandle table, List<ColumnHandle> columns) {
HiveSplit carbonSplit = Types.checkType(((HiveSplitWrapper) (split)).getSplits().get(0), HiveSplit.class, "split is not class HiveSplit");
this.queryId = carbonSplit.getSchema().getProperty("queryId");
if (this.queryId == null) {
// Fall back to hive pagesource.
return super.createPageSource(transactionHandle, session, split, table, columns);
}
try {
hdfsEnvironment.getFileSystem(new HdfsEnvironment.HdfsContext(session, carbonSplit.getDatabase()), new Path(carbonSplit.getSchema().getProperty("tablePath")));
} catch (IOException e) {
throw new PrestoException(GENERIC_INTERNAL_ERROR, "Failed to get file system: " + e.getMessage());
}
return hdfsEnvironment.doAs(session.getUser(), () -> {
Configuration configuration = this.hdfsEnvironment.getConfiguration(new HdfsEnvironment.HdfsContext(session, carbonSplit.getDatabase(), carbonSplit.getTable()), new Path(carbonSplit.getSchema().getProperty("tablePath")));
CarbonTable carbonTable = getCarbonTable(carbonSplit, configuration);
/* So that CarbonTLS can access it */
ThreadLocalSessionInfo.setConfigurationToCurrentThread(configuration);
boolean isFullACID = isFullAcidTable(Maps.fromProperties(carbonSplit.getSchema()));
boolean isDirectVectorFill = (carbonTableReader.config.getPushRowFilter() == null) || carbonTableReader.config.getPushRowFilter().equalsIgnoreCase("false") || columns.stream().anyMatch(c -> c.getColumnName().equalsIgnoreCase(CarbonCommonConstants.CARBON_IMPLICIT_COLUMN_TUPLEID));
return new CarbondataPageSource(carbonTable, queryId, carbonSplit, columns, table, configuration, isDirectVectorFill, isFullACID, session.getUser(), hdfsEnvironment);
});
}
use of io.prestosql.plugin.hive.HiveSplitWrapper in project hetu-core by openlookeng.
the class CarbondataSplitManager method getSplitsForCompaction.
/*
* Convert the splits into batches based on task id and wrap around ConnectorSplitSource to send back
*/
public ConnectorSplitSource getSplitsForCompaction(HiveIdentity identity, ConnectorTransactionHandle transactionHandle, ConnectorTableHandle tableHandle, String tablePath, Map<String, Object> queryProperties, String queryId, ImmutableList.Builder<ConnectorSplit> allSplitsForComp, Configuration configuration) throws PrestoException {
HiveTableHandle hiveTable = (HiveTableHandle) tableHandle;
SchemaTableName schemaTableName = hiveTable.getSchemaTableName();
List<List<LoadMetadataDetails>> allGroupedSegList;
// Step 1: Get table handles and metadata
SemiTransactionalHiveMetastore metaStore = metastoreProvider.apply((HiveTransactionHandle) transactionHandle);
Table table = metaStore.getTable(identity, schemaTableName.getSchemaName(), schemaTableName.getTableName()).orElseThrow(() -> new TableNotFoundException(schemaTableName));
Properties hiveSchema = MetastoreUtil.getHiveSchema(table);
CarbonLoadModel carbonLoadModel = null;
try {
carbonLoadModel = HiveCarbonUtil.getCarbonLoadModel(hiveSchema, configuration);
} catch (Exception e) {
LOGGER.error("Cannot create carbon load model");
throw new PrestoException(GENERIC_INTERNAL_ERROR, "Cannot create carbon load model");
}
CompactionType compactionType = (queryProperties.get("FULL") == Boolean.valueOf("true")) ? CompactionType.MAJOR : CompactionType.MINOR;
// Step 2: Get segments to be merged based on configuration passed
allGroupedSegList = CarbondataHetuCompactorUtil.identifyAndGroupSegmentsToBeMerged(carbonLoadModel, configuration, compactionType, carbondataConfig.getMajorVacuumSegSize(), carbondataConfig.getMinorVacuumSegCount());
// All the splits are grouped based on taskIds and compaction level into one builder
ImmutableList.Builder<ConnectorSplit> cSplits = ImmutableList.builder();
Gson gson = new Gson();
for (List<LoadMetadataDetails> segmentsToBeMerged : allGroupedSegList) {
String mergedLoadName = CarbonDataMergerUtil.getMergedLoadName(segmentsToBeMerged);
// Step 3: Get all the splits for the required segments and divide them based on task ids
Map<String, List<CarbondataLocalInputSplit>> taskIdToSplitMapping = new HashMap<>();
for (ConnectorSplit connectorSplit : allSplitsForComp.build()) {
HiveSplit currSplit = ((HiveSplitWrapper) connectorSplit).getSplits().get(0);
CarbondataLocalMultiBlockSplit currSplits = gson.fromJson(currSplit.getSchema().getProperty("carbonSplit"), CarbondataLocalMultiBlockSplit.class);
for (CarbondataLocalInputSplit split : currSplits.getSplitList()) {
CarbonInputSplit carbonInputSplit = CarbondataLocalInputSplit.convertSplit(split);
String taskId = carbonInputSplit.taskId;
String segmentNo = carbonInputSplit.getSegmentId();
for (LoadMetadataDetails load : segmentsToBeMerged) {
if (load.getLoadName().equals(segmentNo)) {
List<CarbondataLocalInputSplit> currList = taskIdToSplitMapping.computeIfAbsent(taskId, k -> new ArrayList<>());
currList.add(split);
}
}
}
}
// Step 4: Create the ConnectorSplitSource with the splits divided and return
long index = 0;
for (Map.Entry<String, List<CarbondataLocalInputSplit>> splitEntry : taskIdToSplitMapping.entrySet()) {
CarbondataLocalMultiBlockSplit currSplit = new CarbondataLocalMultiBlockSplit(splitEntry.getValue(), splitEntry.getValue().stream().flatMap(f -> Arrays.stream(getLocations(f))).distinct().toArray(String[]::new));
index++;
Properties properties = new Properties();
for (Map.Entry<String, String> entry : table.getStorage().getSerdeParameters().entrySet()) {
properties.setProperty(entry.getKey(), entry.getValue());
}
// TODO: Use the existing CarbondataLocalInputSplit list to convert
properties.setProperty("tablePath", tablePath);
properties.setProperty("carbonSplit", currSplit.getJsonString());
properties.setProperty("queryId", queryId);
properties.setProperty("index", String.valueOf(index));
properties.setProperty("mergeLoadName", mergedLoadName);
properties.setProperty("compactionType", compactionType.toString());
properties.setProperty("taskNo", splitEntry.getKey());
cSplits.add(HiveSplitWrapper.wrap(new HiveSplit(schemaTableName.getSchemaName(), schemaTableName.getTableName(), schemaTableName.getTableName(), tablePath, 0L, 0L, 0L, 0L, properties, new ArrayList(), getHostAddresses(currSplit.getLocations()), OptionalInt.empty(), false, new HashMap<>(), Optional.empty(), false, Optional.empty(), Optional.empty(), false, ImmutableMap.of())));
}
}
LOGGER.info("Splits for compaction built and ready");
return new FixedSplitSource(cSplits.build());
}
Aggregations