use of org.apache.carbondata.processing.newflow.sort.unsafe.UnsafeCarbonRowPage in project carbondata by apache.
the class UnsafeParallelReadMergeSorterImpl method sort.
@Override
public Iterator<CarbonRowBatch>[] sort(Iterator<CarbonRowBatch>[] iterators) throws CarbonDataLoadingException {
int inMemoryChunkSizeInMB = CarbonProperties.getInstance().getSortMemoryChunkSizeInMB();
UnsafeSortDataRows sortDataRow = new UnsafeSortDataRows(sortParameters, unsafeIntermediateFileMerger, inMemoryChunkSizeInMB);
final int batchSize = CarbonProperties.getInstance().getBatchSize();
try {
sortDataRow.initialize();
} catch (CarbonSortKeyAndGroupByException e) {
throw new CarbonDataLoadingException(e);
}
this.executorService = Executors.newFixedThreadPool(iterators.length);
this.threadStatusObserver = new ThreadStatusObserver(this.executorService);
try {
for (int i = 0; i < iterators.length; i++) {
executorService.submit(new SortIteratorThread(iterators[i], sortDataRow, batchSize, rowCounter, this.threadStatusObserver));
}
executorService.shutdown();
executorService.awaitTermination(2, TimeUnit.DAYS);
processRowToNextStep(sortDataRow, sortParameters);
} catch (Exception e) {
checkError();
throw new CarbonDataLoadingException("Problem while shutdown the server ", e);
}
checkError();
try {
unsafeIntermediateFileMerger.finish();
List<UnsafeCarbonRowPage> rowPages = unsafeIntermediateFileMerger.getRowPages();
finalMerger.startFinalMerge(rowPages.toArray(new UnsafeCarbonRowPage[rowPages.size()]), unsafeIntermediateFileMerger.getMergedPages());
} catch (CarbonDataWriterException e) {
throw new CarbonDataLoadingException(e);
} catch (CarbonSortKeyAndGroupByException e) {
throw new CarbonDataLoadingException(e);
}
// Creates the iterator to read from merge sorter.
Iterator<CarbonRowBatch> batchIterator = new CarbonIterator<CarbonRowBatch>() {
@Override
public boolean hasNext() {
return finalMerger.hasNext();
}
@Override
public CarbonRowBatch next() {
int counter = 0;
CarbonRowBatch rowBatch = new CarbonRowBatch(batchSize);
while (finalMerger.hasNext() && counter < batchSize) {
rowBatch.addRow(new CarbonRow(finalMerger.next()));
counter++;
}
return rowBatch;
}
};
return new Iterator[] { batchIterator };
}
use of org.apache.carbondata.processing.newflow.sort.unsafe.UnsafeCarbonRowPage in project carbondata by apache.
the class UnsafeSingleThreadFinalSortFilesMerger method startSorting.
/**
* Below method will be used to start storing process This method will get
* all the temp files present in sort temp folder then it will create the
* record holder heap and then it will read first record from each file and
* initialize the heap
*
*/
private void startSorting(UnsafeCarbonRowPage[] rowPages, List<UnsafeInMemoryIntermediateDataMerger> merges) throws CarbonDataWriterException {
try {
File[] filesToMergeSort = getFilesToMergeSort();
this.fileCounter = rowPages.length + filesToMergeSort.length + merges.size();
if (fileCounter == 0) {
LOGGER.info("No files to merge sort");
return;
}
LOGGER.info("Number of row pages: " + this.fileCounter);
// create record holder heap
createRecordHolderQueue();
// iterate over file list and create chunk holder and add to heap
LOGGER.info("Started adding first record from each page");
for (final UnsafeCarbonRowPage rowPage : rowPages) {
SortTempChunkHolder sortTempFileChunkHolder = new UnsafeInmemoryHolder(rowPage, parameters.getDimColCount() + parameters.getComplexDimColCount() + parameters.getMeasureColCount(), parameters.getNumberOfSortColumns());
// initialize
sortTempFileChunkHolder.readRow();
recordHolderHeapLocal.add(sortTempFileChunkHolder);
}
for (final UnsafeInMemoryIntermediateDataMerger merger : merges) {
SortTempChunkHolder sortTempFileChunkHolder = new UnsafeFinalMergePageHolder(merger, parameters.getNoDictionarySortColumn(), parameters.getDimColCount() + parameters.getComplexDimColCount() + parameters.getMeasureColCount());
// initialize
sortTempFileChunkHolder.readRow();
recordHolderHeapLocal.add(sortTempFileChunkHolder);
}
for (final File file : filesToMergeSort) {
SortTempChunkHolder sortTempFileChunkHolder = new UnsafeSortTempFileChunkHolder(file, parameters);
// initialize
sortTempFileChunkHolder.readRow();
recordHolderHeapLocal.add(sortTempFileChunkHolder);
}
LOGGER.info("Heap Size" + this.recordHolderHeapLocal.size());
} catch (Exception e) {
LOGGER.error(e);
throw new CarbonDataWriterException(e.getMessage());
}
}
use of org.apache.carbondata.processing.newflow.sort.unsafe.UnsafeCarbonRowPage in project carbondata by apache.
the class UnsafeInMemoryIntermediateDataMerger method startSorting.
/**
* Below method will be used to start storing process This method will get
* all the temp files present in sort temp folder then it will create the
* record holder heap and then it will read first record from each file and
* initialize the heap
*
* @throws CarbonSortKeyAndGroupByException
*/
private void startSorting() throws CarbonSortKeyAndGroupByException {
LOGGER.info("Number of row pages in intermediate merger: " + this.holderCounter);
// create record holder heap
createRecordHolderQueue(unsafeCarbonRowPages);
// iterate over file list and create chunk holder and add to heap
LOGGER.info("Started adding first record from row page");
UnsafeInmemoryMergeHolder unsafePageHolder = null;
byte index = 0;
for (UnsafeCarbonRowPage unsafeCarbonRowPage : unsafeCarbonRowPages) {
// create chunk holder
unsafePageHolder = new UnsafeInmemoryMergeHolder(unsafeCarbonRowPage, index++);
// initialize
unsafePageHolder.readRow();
// add to heap
this.recordHolderHeap.add(unsafePageHolder);
}
LOGGER.info("Heap Size" + this.recordHolderHeap.size());
}
Aggregations