use of io.prestosql.spi.connector.ConnectorSplitSource in project hetu-core by openlookeng.
the class CarbondataSplitManager method getSplitsForCompaction.
/*
* Convert the splits into batches based on task id and wrap around ConnectorSplitSource to send back
*/
public ConnectorSplitSource getSplitsForCompaction(HiveIdentity identity, ConnectorTransactionHandle transactionHandle, ConnectorTableHandle tableHandle, String tablePath, Map<String, Object> queryProperties, String queryId, ImmutableList.Builder<ConnectorSplit> allSplitsForComp, Configuration configuration) throws PrestoException {
HiveTableHandle hiveTable = (HiveTableHandle) tableHandle;
SchemaTableName schemaTableName = hiveTable.getSchemaTableName();
List<List<LoadMetadataDetails>> allGroupedSegList;
// Step 1: Get table handles and metadata
SemiTransactionalHiveMetastore metaStore = metastoreProvider.apply((HiveTransactionHandle) transactionHandle);
Table table = metaStore.getTable(identity, schemaTableName.getSchemaName(), schemaTableName.getTableName()).orElseThrow(() -> new TableNotFoundException(schemaTableName));
Properties hiveSchema = MetastoreUtil.getHiveSchema(table);
CarbonLoadModel carbonLoadModel = null;
try {
carbonLoadModel = HiveCarbonUtil.getCarbonLoadModel(hiveSchema, configuration);
} catch (Exception e) {
LOGGER.error("Cannot create carbon load model");
throw new PrestoException(GENERIC_INTERNAL_ERROR, "Cannot create carbon load model");
}
CompactionType compactionType = (queryProperties.get("FULL") == Boolean.valueOf("true")) ? CompactionType.MAJOR : CompactionType.MINOR;
// Step 2: Get segments to be merged based on configuration passed
allGroupedSegList = CarbondataHetuCompactorUtil.identifyAndGroupSegmentsToBeMerged(carbonLoadModel, configuration, compactionType, carbondataConfig.getMajorVacuumSegSize(), carbondataConfig.getMinorVacuumSegCount());
// All the splits are grouped based on taskIds and compaction level into one builder
ImmutableList.Builder<ConnectorSplit> cSplits = ImmutableList.builder();
Gson gson = new Gson();
for (List<LoadMetadataDetails> segmentsToBeMerged : allGroupedSegList) {
String mergedLoadName = CarbonDataMergerUtil.getMergedLoadName(segmentsToBeMerged);
// Step 3: Get all the splits for the required segments and divide them based on task ids
Map<String, List<CarbondataLocalInputSplit>> taskIdToSplitMapping = new HashMap<>();
for (ConnectorSplit connectorSplit : allSplitsForComp.build()) {
HiveSplit currSplit = ((HiveSplitWrapper) connectorSplit).getSplits().get(0);
CarbondataLocalMultiBlockSplit currSplits = gson.fromJson(currSplit.getSchema().getProperty("carbonSplit"), CarbondataLocalMultiBlockSplit.class);
for (CarbondataLocalInputSplit split : currSplits.getSplitList()) {
CarbonInputSplit carbonInputSplit = CarbondataLocalInputSplit.convertSplit(split);
String taskId = carbonInputSplit.taskId;
String segmentNo = carbonInputSplit.getSegmentId();
for (LoadMetadataDetails load : segmentsToBeMerged) {
if (load.getLoadName().equals(segmentNo)) {
List<CarbondataLocalInputSplit> currList = taskIdToSplitMapping.computeIfAbsent(taskId, k -> new ArrayList<>());
currList.add(split);
}
}
}
}
// Step 4: Create the ConnectorSplitSource with the splits divided and return
long index = 0;
for (Map.Entry<String, List<CarbondataLocalInputSplit>> splitEntry : taskIdToSplitMapping.entrySet()) {
CarbondataLocalMultiBlockSplit currSplit = new CarbondataLocalMultiBlockSplit(splitEntry.getValue(), splitEntry.getValue().stream().flatMap(f -> Arrays.stream(getLocations(f))).distinct().toArray(String[]::new));
index++;
Properties properties = new Properties();
for (Map.Entry<String, String> entry : table.getStorage().getSerdeParameters().entrySet()) {
properties.setProperty(entry.getKey(), entry.getValue());
}
// TODO: Use the existing CarbondataLocalInputSplit list to convert
properties.setProperty("tablePath", tablePath);
properties.setProperty("carbonSplit", currSplit.getJsonString());
properties.setProperty("queryId", queryId);
properties.setProperty("index", String.valueOf(index));
properties.setProperty("mergeLoadName", mergedLoadName);
properties.setProperty("compactionType", compactionType.toString());
properties.setProperty("taskNo", splitEntry.getKey());
cSplits.add(HiveSplitWrapper.wrap(new HiveSplit(schemaTableName.getSchemaName(), schemaTableName.getTableName(), schemaTableName.getTableName(), tablePath, 0L, 0L, 0L, 0L, properties, new ArrayList(), getHostAddresses(currSplit.getLocations()), OptionalInt.empty(), false, new HashMap<>(), Optional.empty(), false, Optional.empty(), Optional.empty(), false, ImmutableMap.of())));
}
}
LOGGER.info("Splits for compaction built and ready");
return new FixedSplitSource(cSplits.build());
}
use of io.prestosql.spi.connector.ConnectorSplitSource in project hetu-core by openlookeng.
the class AbstractTestHiveFileSystem method testGetRecords.
@Test
public void testGetRecords() throws Exception {
try (Transaction transaction = newTransaction()) {
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorSession session = newSession();
ConnectorTableHandle tableHandle = getTableHandle(metadata, this.table);
List<ColumnHandle> columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, tableHandle).values());
Map<String, Integer> columnIndex = indexColumns(columnHandles);
ConnectorSplitSource splitSource = splitManager.getSplits(transaction.getTransactionHandle(), session, tableHandle, UNGROUPED_SCHEDULING);
List<ConnectorSplit> splits = getAllSplits(splitSource);
assertEquals(splits.size(), 1);
long sum = 0;
for (ConnectorSplit split : splits) {
try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, split, tableHandle, columnHandles)) {
MaterializedResult result = materializeSourceDataStream(session, pageSource, getTypes(columnHandles));
for (MaterializedRow row : result) {
sum += (Long) row.getField(columnIndex.get("t_bigint"));
}
}
}
// The test table is made up of multiple S3 objects with same data and different compression codec
// formats: uncompressed | .gz | .lz4 | .bz2
assertEquals(sum, 78300 * 4);
}
}
use of io.prestosql.spi.connector.ConnectorSplitSource in project hetu-core by openlookeng.
the class AbstractTestHive method testGetPartitionSplitsBatch.
@Test
public void testGetPartitionSplitsBatch() {
try (Transaction transaction = newTransaction()) {
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorSession session = newSession();
metadata.beginQuery(session);
ConnectorTableHandle tableHandle = getTableHandle(metadata, tablePartitionFormat);
ConnectorSplitSource splitSource = splitManager.getSplits(transaction.getTransactionHandle(), session, tableHandle, UNGROUPED_SCHEDULING);
assertEquals(getSplitCount(splitSource), tablePartitionFormatPartitions.size());
}
}
use of io.prestosql.spi.connector.ConnectorSplitSource in project hetu-core by openlookeng.
the class AbstractTestHive method testGetPartitionSplitsBatchUnpartitioned.
@Test
public void testGetPartitionSplitsBatchUnpartitioned() {
try (Transaction transaction = newTransaction()) {
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorSession session = newSession();
metadata.beginQuery(session);
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableUnpartitioned);
ConnectorSplitSource splitSource = splitManager.getSplits(transaction.getTransactionHandle(), session, tableHandle, UNGROUPED_SCHEDULING);
assertEquals(getSplitCount(splitSource), 1);
}
}
use of io.prestosql.spi.connector.ConnectorSplitSource in project boostkit-bigdata by kunpengcompute.
the class AbstractTestHive method testPartitionSchemaNonCanonical.
// TODO coercion of non-canonical values should be supported
@Test(enabled = false)
public void testPartitionSchemaNonCanonical() throws Exception {
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorTableHandle table = getTableHandle(metadata, tablePartitionSchemaChangeNonCanonical);
ColumnHandle column = metadata.getColumnHandles(session, table).get("t_boolean");
Constraint constraint = new Constraint(TupleDomain.fromFixedValues(ImmutableMap.of(column, NullableValue.of(BOOLEAN, false))));
table = applyFilter(metadata, table, constraint);
HivePartition partition = getOnlyElement(((HiveTableHandle) table).getPartitions().orElseThrow(AssertionError::new));
assertEquals(getPartitionId(partition), "t_boolean=0");
ConnectorSplitSource splitSource = splitManager.getSplits(transaction.getTransactionHandle(), session, table, UNGROUPED_SCHEDULING);
ConnectorSplit split = getOnlyElement(getAllSplits(splitSource));
ImmutableList<ColumnHandle> columnHandles = ImmutableList.of(column);
try (ConnectorPageSource ignored = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, split, table, columnHandles)) {
fail("expected exception");
} catch (PrestoException e) {
assertEquals(e.getErrorCode(), HiveErrorCode.HIVE_INVALID_PARTITION_VALUE.toErrorCode());
}
}
}
Aggregations