use of org.apache.carbondata.processing.loading.sort.unsafe.holder.SortTempChunkHolder in project carbondata by apache.
the class UnsafeIntermediateFileMerger method startSorting.
/**
* Below method will be used to start storing process This method will get
* all the temp files present in sort temp folder then it will create the
* record holder heap and then it will read first record from each file and
* initialize the heap
*
* @throws CarbonSortKeyAndGroupByException
*/
private void startSorting() throws CarbonSortKeyAndGroupByException {
LOGGER.info("Number of temp file: " + this.fileCounter);
// create record holder heap
createRecordHolderQueue(intermediateFiles);
// iterate over file list and create chunk holder and add to heap
LOGGER.info("Started adding first record from each file");
SortTempChunkHolder sortTempFileChunkHolder = null;
for (File tempFile : intermediateFiles) {
// create chunk holder
sortTempFileChunkHolder = new UnsafeSortTempFileChunkHolder(tempFile, mergerParameters);
sortTempFileChunkHolder.readRow();
this.totalNumberOfRecords += sortTempFileChunkHolder.numberOfRows();
// add to heap
this.recordHolderHeap.add(sortTempFileChunkHolder);
}
LOGGER.info("Heap Size: " + this.recordHolderHeap.size());
}
use of org.apache.carbondata.processing.loading.sort.unsafe.holder.SortTempChunkHolder in project carbondata by apache.
the class UnsafeIntermediateFileMerger method getSortedRecordFromFile.
/**
* This method will be used to get sorted sort temp row from the sort temp files
*
* @return sorted record sorted record
* @throws CarbonSortKeyAndGroupByException
*/
private IntermediateSortTempRow getSortedRecordFromFile() throws CarbonSortKeyAndGroupByException {
IntermediateSortTempRow row = null;
// poll the top object from heap
// heap maintains binary tree which is based on heap condition that will
// be based on comparator we are passing the heap
// when will call poll it will always delete root of the tree and then
// it does trickel down operation complexity is log(n)
SortTempChunkHolder poll = this.recordHolderHeap.poll();
// get the row from chunk
row = poll.getRow();
// check if there no entry present
if (!poll.hasNext()) {
// if chunk is empty then close the stream
poll.close();
// change the file counter
--this.fileCounter;
// reaturn row
return row;
}
// read new row
poll.readRow();
// add to heap
this.recordHolderHeap.add(poll);
// return row
return row;
}
use of org.apache.carbondata.processing.loading.sort.unsafe.holder.SortTempChunkHolder in project carbondata by apache.
the class UnsafeSingleThreadFinalSortFilesMerger method startSorting.
/**
* Below method will be used to start storing process This method will get
* all the temp files present in sort temp folder then it will create the
* record holder heap and then it will read first record from each file and
* initialize the heap
*/
private void startSorting(UnsafeCarbonRowPage[] rowPages, List<UnsafeInMemoryIntermediateDataMerger> merges) throws CarbonDataWriterException {
try {
List<File> filesToMergeSort = getFilesToMergeSort();
this.fileCounter = rowPages.length + filesToMergeSort.size() + merges.size();
if (fileCounter == 0) {
LOGGER.info("No files to merge sort");
return;
}
LOGGER.info("Starting final merger");
LOGGER.info("Number of row pages: " + this.fileCounter);
// create record holder heap
createRecordHolderQueue();
// iterate over file list and create chunk holder and add to heap
LOGGER.info("Started adding first record from each page");
for (final UnsafeCarbonRowPage rowPage : rowPages) {
SortTempChunkHolder sortTempFileChunkHolder = new UnsafeInmemoryHolder(rowPage);
// initialize
sortTempFileChunkHolder.readRow();
recordHolderHeapLocal.add(sortTempFileChunkHolder);
}
for (final UnsafeInMemoryIntermediateDataMerger merger : merges) {
SortTempChunkHolder sortTempFileChunkHolder = new UnsafeFinalMergePageHolder(merger, parameters.getNoDictionarySortColumn());
// initialize
sortTempFileChunkHolder.readRow();
recordHolderHeapLocal.add(sortTempFileChunkHolder);
}
for (final File file : filesToMergeSort) {
SortTempChunkHolder sortTempFileChunkHolder = new UnsafeSortTempFileChunkHolder(file, parameters);
// initialize
sortTempFileChunkHolder.readRow();
recordHolderHeapLocal.add(sortTempFileChunkHolder);
}
LOGGER.info("Heap Size: " + this.recordHolderHeapLocal.size());
} catch (Exception e) {
LOGGER.error(e);
throw new CarbonDataWriterException(e);
}
}
use of org.apache.carbondata.processing.loading.sort.unsafe.holder.SortTempChunkHolder in project carbondata by apache.
the class UnsafeSingleThreadFinalSortFilesMerger method getSortedRecordFromFile.
/**
* This method will be used to get the sorted record from file
*
* @return sorted record sorted record
*/
private IntermediateSortTempRow getSortedRecordFromFile() throws CarbonDataWriterException {
IntermediateSortTempRow row = null;
// poll the top object from heap
// heap maintains binary tree which is based on heap condition that will
// be based on comparator we are passing the heap
// when will call poll it will always delete root of the tree and then
// it does trickel down operation complexity is log(n)
SortTempChunkHolder poll = this.recordHolderHeapLocal.poll();
// get the row from chunk
row = poll.getRow();
// check if there no entry present
if (!poll.hasNext()) {
// if chunk is empty then close the stream
poll.close();
// change the file counter
--this.fileCounter;
// reaturn row
return row;
}
// read new row
try {
poll.readRow();
} catch (Exception e) {
throw new CarbonDataWriterException(e.getMessage(), e);
}
// add to heap
this.recordHolderHeapLocal.add(poll);
// return row
return row;
}
Aggregations