use of org.apache.carbondata.processing.sortandgroupby.sortdata.SortTempFileChunkHolder in project carbondata by apache.
the class SingleThreadFinalSortFilesMerger method getSortedRecordFromFile.
/**
* This method will be used to get the sorted record from file
*
* @return sorted record sorted record
* @throws CarbonSortKeyAndGroupByException
*/
private Object[] getSortedRecordFromFile() throws CarbonDataWriterException {
Object[] row = null;
// poll the top object from heap
// heap maintains binary tree which is based on heap condition that will
// be based on comparator we are passing the heap
// when will call poll it will always delete root of the tree and then
// it does trickel down operation complexity is log(n)
SortTempFileChunkHolder poll = this.recordHolderHeapLocal.poll();
// get the row from chunk
row = poll.getRow();
// check if there no entry present
if (!poll.hasNext()) {
// if chunk is empty then close the stream
poll.closeStream();
// change the file counter
--this.fileCounter;
// reaturn row
return row;
}
// read new row
try {
poll.readRow();
} catch (CarbonSortKeyAndGroupByException e) {
throw new CarbonDataWriterException(e.getMessage(), e);
}
// add to heap
this.recordHolderHeapLocal.add(poll);
// return row
return row;
}
use of org.apache.carbondata.processing.sortandgroupby.sortdata.SortTempFileChunkHolder in project carbondata by apache.
the class SingleThreadFinalSortFilesMerger method startSorting.
/**
* Below method will be used to start storing process This method will get
* all the temp files present in sort temp folder then it will create the
* record holder heap and then it will read first record from each file and
* initialize the heap
*
* @throws CarbonSortKeyAndGroupByException
*/
private void startSorting(File[] files) throws CarbonDataWriterException {
this.fileCounter = files.length;
if (fileCounter == 0) {
LOGGER.info("No files to merge sort");
return;
}
this.fileBufferSize = CarbonDataProcessorUtil.getFileBufferSize(this.fileCounter, CarbonProperties.getInstance(), CarbonCommonConstants.CONSTANT_SIZE_TEN);
LOGGER.info("Number of temp file: " + this.fileCounter);
LOGGER.info("File Buffer Size: " + this.fileBufferSize);
// create record holder heap
createRecordHolderQueue(files);
// iterate over file list and create chunk holder and add to heap
LOGGER.info("Started adding first record from each file");
int maxThreadForSorting = 0;
try {
maxThreadForSorting = Integer.parseInt(CarbonProperties.getInstance().getProperty(CarbonCommonConstants.CARBON_MERGE_SORT_READER_THREAD, CarbonCommonConstants.CARBON_MERGE_SORT_READER_THREAD_DEFAULTVALUE));
} catch (NumberFormatException e) {
maxThreadForSorting = Integer.parseInt(CarbonCommonConstants.CARBON_MERGE_SORT_READER_THREAD_DEFAULTVALUE);
}
ExecutorService service = Executors.newFixedThreadPool(maxThreadForSorting);
for (final File tempFile : files) {
Callable<Void> runnable = new Callable<Void>() {
@Override
public Void call() throws CarbonSortKeyAndGroupByException {
// create chunk holder
SortTempFileChunkHolder sortTempFileChunkHolder = new SortTempFileChunkHolder(tempFile, dimensionCount, complexDimensionCount, measureCount, fileBufferSize, noDictionaryCount, measureDataType, isNoDictionaryColumn, isNoDictionarySortColumn);
// initialize
sortTempFileChunkHolder.initialize();
sortTempFileChunkHolder.readRow();
synchronized (LOCKOBJECT) {
recordHolderHeapLocal.add(sortTempFileChunkHolder);
}
// add to heap
return null;
}
};
service.submit(runnable);
}
service.shutdown();
try {
service.awaitTermination(2, TimeUnit.HOURS);
} catch (Exception e) {
throw new CarbonDataWriterException(e.getMessage(), e);
}
LOGGER.info("Heap Size" + this.recordHolderHeapLocal.size());
}
Aggregations