Search in sources :

Example 1 with SortTempChunkHolder

use of org.apache.carbondata.processing.loading.sort.unsafe.holder.SortTempChunkHolder in project carbondata by apache.

the class UnsafeIntermediateFileMerger method startSorting.

/**
 * Below method will be used to start storing process This method will get
 * all the temp files present in sort temp folder then it will create the
 * record holder heap and then it will read first record from each file and
 * initialize the heap
 *
 * @throws CarbonSortKeyAndGroupByException
 */
private void startSorting() throws CarbonSortKeyAndGroupByException {
    LOGGER.info("Number of temp file: " + this.fileCounter);
    // create record holder heap
    createRecordHolderQueue(intermediateFiles);
    // iterate over file list and create chunk holder and add to heap
    LOGGER.info("Started adding first record from each file");
    SortTempChunkHolder sortTempFileChunkHolder = null;
    for (File tempFile : intermediateFiles) {
        // create chunk holder
        sortTempFileChunkHolder = new UnsafeSortTempFileChunkHolder(tempFile, mergerParameters);
        sortTempFileChunkHolder.readRow();
        this.totalNumberOfRecords += sortTempFileChunkHolder.numberOfRows();
        // add to heap
        this.recordHolderHeap.add(sortTempFileChunkHolder);
    }
    LOGGER.info("Heap Size: " + this.recordHolderHeap.size());
}
Also used : SortTempChunkHolder(org.apache.carbondata.processing.loading.sort.unsafe.holder.SortTempChunkHolder) File(java.io.File) UnsafeSortTempFileChunkHolder(org.apache.carbondata.processing.loading.sort.unsafe.holder.UnsafeSortTempFileChunkHolder)

Example 2 with SortTempChunkHolder

use of org.apache.carbondata.processing.loading.sort.unsafe.holder.SortTempChunkHolder in project carbondata by apache.

the class UnsafeIntermediateFileMerger method getSortedRecordFromFile.

/**
 * This method will be used to get sorted sort temp row from the sort temp files
 *
 * @return sorted record sorted record
 * @throws CarbonSortKeyAndGroupByException
 */
private IntermediateSortTempRow getSortedRecordFromFile() throws CarbonSortKeyAndGroupByException {
    IntermediateSortTempRow row = null;
    // poll the top object from heap
    // heap maintains binary tree which is based on heap condition that will
    // be based on comparator we are passing the heap
    // when will call poll it will always delete root of the tree and then
    // it does trickel down operation complexity is log(n)
    SortTempChunkHolder poll = this.recordHolderHeap.poll();
    // get the row from chunk
    row = poll.getRow();
    // check if there no entry present
    if (!poll.hasNext()) {
        // if chunk is empty then close the stream
        poll.close();
        // change the file counter
        --this.fileCounter;
        // reaturn row
        return row;
    }
    // read new row
    poll.readRow();
    // add to heap
    this.recordHolderHeap.add(poll);
    // return row
    return row;
}
Also used : IntermediateSortTempRow(org.apache.carbondata.processing.loading.row.IntermediateSortTempRow) SortTempChunkHolder(org.apache.carbondata.processing.loading.sort.unsafe.holder.SortTempChunkHolder)

Example 3 with SortTempChunkHolder

use of org.apache.carbondata.processing.loading.sort.unsafe.holder.SortTempChunkHolder in project carbondata by apache.

the class UnsafeSingleThreadFinalSortFilesMerger method startSorting.

/**
 * Below method will be used to start storing process This method will get
 * all the temp files present in sort temp folder then it will create the
 * record holder heap and then it will read first record from each file and
 * initialize the heap
 */
private void startSorting(UnsafeCarbonRowPage[] rowPages, List<UnsafeInMemoryIntermediateDataMerger> merges) throws CarbonDataWriterException {
    try {
        List<File> filesToMergeSort = getFilesToMergeSort();
        this.fileCounter = rowPages.length + filesToMergeSort.size() + merges.size();
        if (fileCounter == 0) {
            LOGGER.info("No files to merge sort");
            return;
        }
        LOGGER.info("Starting final merger");
        LOGGER.info("Number of row pages: " + this.fileCounter);
        // create record holder heap
        createRecordHolderQueue();
        // iterate over file list and create chunk holder and add to heap
        LOGGER.info("Started adding first record from each page");
        for (final UnsafeCarbonRowPage rowPage : rowPages) {
            SortTempChunkHolder sortTempFileChunkHolder = new UnsafeInmemoryHolder(rowPage);
            // initialize
            sortTempFileChunkHolder.readRow();
            recordHolderHeapLocal.add(sortTempFileChunkHolder);
        }
        for (final UnsafeInMemoryIntermediateDataMerger merger : merges) {
            SortTempChunkHolder sortTempFileChunkHolder = new UnsafeFinalMergePageHolder(merger, parameters.getNoDictionarySortColumn());
            // initialize
            sortTempFileChunkHolder.readRow();
            recordHolderHeapLocal.add(sortTempFileChunkHolder);
        }
        for (final File file : filesToMergeSort) {
            SortTempChunkHolder sortTempFileChunkHolder = new UnsafeSortTempFileChunkHolder(file, parameters);
            // initialize
            sortTempFileChunkHolder.readRow();
            recordHolderHeapLocal.add(sortTempFileChunkHolder);
        }
        LOGGER.info("Heap Size: " + this.recordHolderHeapLocal.size());
    } catch (Exception e) {
        LOGGER.error(e);
        throw new CarbonDataWriterException(e);
    }
}
Also used : UnsafeFinalMergePageHolder(org.apache.carbondata.processing.loading.sort.unsafe.holder.UnsafeFinalMergePageHolder) SortTempChunkHolder(org.apache.carbondata.processing.loading.sort.unsafe.holder.SortTempChunkHolder) UnsafeCarbonRowPage(org.apache.carbondata.processing.loading.sort.unsafe.UnsafeCarbonRowPage) File(java.io.File) UnsafeInmemoryHolder(org.apache.carbondata.processing.loading.sort.unsafe.holder.UnsafeInmemoryHolder) UnsafeSortTempFileChunkHolder(org.apache.carbondata.processing.loading.sort.unsafe.holder.UnsafeSortTempFileChunkHolder) CarbonDataWriterException(org.apache.carbondata.core.datastore.exception.CarbonDataWriterException) CarbonDataWriterException(org.apache.carbondata.core.datastore.exception.CarbonDataWriterException)

Example 4 with SortTempChunkHolder

use of org.apache.carbondata.processing.loading.sort.unsafe.holder.SortTempChunkHolder in project carbondata by apache.

the class UnsafeSingleThreadFinalSortFilesMerger method getSortedRecordFromFile.

/**
 * This method will be used to get the sorted record from file
 *
 * @return sorted record sorted record
 */
private IntermediateSortTempRow getSortedRecordFromFile() throws CarbonDataWriterException {
    IntermediateSortTempRow row = null;
    // poll the top object from heap
    // heap maintains binary tree which is based on heap condition that will
    // be based on comparator we are passing the heap
    // when will call poll it will always delete root of the tree and then
    // it does trickel down operation complexity is log(n)
    SortTempChunkHolder poll = this.recordHolderHeapLocal.poll();
    // get the row from chunk
    row = poll.getRow();
    // check if there no entry present
    if (!poll.hasNext()) {
        // if chunk is empty then close the stream
        poll.close();
        // change the file counter
        --this.fileCounter;
        // reaturn row
        return row;
    }
    // read new row
    try {
        poll.readRow();
    } catch (Exception e) {
        throw new CarbonDataWriterException(e.getMessage(), e);
    }
    // add to heap
    this.recordHolderHeapLocal.add(poll);
    // return row
    return row;
}
Also used : IntermediateSortTempRow(org.apache.carbondata.processing.loading.row.IntermediateSortTempRow) SortTempChunkHolder(org.apache.carbondata.processing.loading.sort.unsafe.holder.SortTempChunkHolder) CarbonDataWriterException(org.apache.carbondata.core.datastore.exception.CarbonDataWriterException) CarbonDataWriterException(org.apache.carbondata.core.datastore.exception.CarbonDataWriterException)

Aggregations

SortTempChunkHolder (org.apache.carbondata.processing.loading.sort.unsafe.holder.SortTempChunkHolder)4 File (java.io.File)2 CarbonDataWriterException (org.apache.carbondata.core.datastore.exception.CarbonDataWriterException)2 IntermediateSortTempRow (org.apache.carbondata.processing.loading.row.IntermediateSortTempRow)2 UnsafeSortTempFileChunkHolder (org.apache.carbondata.processing.loading.sort.unsafe.holder.UnsafeSortTempFileChunkHolder)2 UnsafeCarbonRowPage (org.apache.carbondata.processing.loading.sort.unsafe.UnsafeCarbonRowPage)1 UnsafeFinalMergePageHolder (org.apache.carbondata.processing.loading.sort.unsafe.holder.UnsafeFinalMergePageHolder)1 UnsafeInmemoryHolder (org.apache.carbondata.processing.loading.sort.unsafe.holder.UnsafeInmemoryHolder)1