Search in sources :

Example 1 with UnsafeCarbonRowPage

use of org.apache.carbondata.processing.newflow.sort.unsafe.UnsafeCarbonRowPage in project carbondata by apache.

the class UnsafeParallelReadMergeSorterImpl method sort.

@Override
public Iterator<CarbonRowBatch>[] sort(Iterator<CarbonRowBatch>[] iterators) throws CarbonDataLoadingException {
    int inMemoryChunkSizeInMB = CarbonProperties.getInstance().getSortMemoryChunkSizeInMB();
    UnsafeSortDataRows sortDataRow = new UnsafeSortDataRows(sortParameters, unsafeIntermediateFileMerger, inMemoryChunkSizeInMB);
    final int batchSize = CarbonProperties.getInstance().getBatchSize();
    try {
        sortDataRow.initialize();
    } catch (CarbonSortKeyAndGroupByException e) {
        throw new CarbonDataLoadingException(e);
    }
    this.executorService = Executors.newFixedThreadPool(iterators.length);
    this.threadStatusObserver = new ThreadStatusObserver(this.executorService);
    try {
        for (int i = 0; i < iterators.length; i++) {
            executorService.submit(new SortIteratorThread(iterators[i], sortDataRow, batchSize, rowCounter, this.threadStatusObserver));
        }
        executorService.shutdown();
        executorService.awaitTermination(2, TimeUnit.DAYS);
        processRowToNextStep(sortDataRow, sortParameters);
    } catch (Exception e) {
        checkError();
        throw new CarbonDataLoadingException("Problem while shutdown the server ", e);
    }
    checkError();
    try {
        unsafeIntermediateFileMerger.finish();
        List<UnsafeCarbonRowPage> rowPages = unsafeIntermediateFileMerger.getRowPages();
        finalMerger.startFinalMerge(rowPages.toArray(new UnsafeCarbonRowPage[rowPages.size()]), unsafeIntermediateFileMerger.getMergedPages());
    } catch (CarbonDataWriterException e) {
        throw new CarbonDataLoadingException(e);
    } catch (CarbonSortKeyAndGroupByException e) {
        throw new CarbonDataLoadingException(e);
    }
    // Creates the iterator to read from merge sorter.
    Iterator<CarbonRowBatch> batchIterator = new CarbonIterator<CarbonRowBatch>() {

        @Override
        public boolean hasNext() {
            return finalMerger.hasNext();
        }

        @Override
        public CarbonRowBatch next() {
            int counter = 0;
            CarbonRowBatch rowBatch = new CarbonRowBatch(batchSize);
            while (finalMerger.hasNext() && counter < batchSize) {
                rowBatch.addRow(new CarbonRow(finalMerger.next()));
                counter++;
            }
            return rowBatch;
        }
    };
    return new Iterator[] { batchIterator };
}
Also used : CarbonRowBatch(org.apache.carbondata.processing.newflow.row.CarbonRowBatch) CarbonDataLoadingException(org.apache.carbondata.processing.newflow.exception.CarbonDataLoadingException) CarbonRow(org.apache.carbondata.processing.newflow.row.CarbonRow) UnsafeCarbonRowPage(org.apache.carbondata.processing.newflow.sort.unsafe.UnsafeCarbonRowPage) CarbonDataWriterException(org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException) CarbonSortKeyAndGroupByException(org.apache.carbondata.processing.sortandgroupby.exception.CarbonSortKeyAndGroupByException) CarbonDataWriterException(org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException) CarbonDataLoadingException(org.apache.carbondata.processing.newflow.exception.CarbonDataLoadingException) CarbonIterator(org.apache.carbondata.common.CarbonIterator) CarbonIterator(org.apache.carbondata.common.CarbonIterator) Iterator(java.util.Iterator) CarbonSortKeyAndGroupByException(org.apache.carbondata.processing.sortandgroupby.exception.CarbonSortKeyAndGroupByException) UnsafeSortDataRows(org.apache.carbondata.processing.newflow.sort.unsafe.UnsafeSortDataRows)

Example 2 with UnsafeCarbonRowPage

use of org.apache.carbondata.processing.newflow.sort.unsafe.UnsafeCarbonRowPage in project carbondata by apache.

the class UnsafeSingleThreadFinalSortFilesMerger method startSorting.

/**
   * Below method will be used to start storing process This method will get
   * all the temp files present in sort temp folder then it will create the
   * record holder heap and then it will read first record from each file and
   * initialize the heap
   *
   */
private void startSorting(UnsafeCarbonRowPage[] rowPages, List<UnsafeInMemoryIntermediateDataMerger> merges) throws CarbonDataWriterException {
    try {
        File[] filesToMergeSort = getFilesToMergeSort();
        this.fileCounter = rowPages.length + filesToMergeSort.length + merges.size();
        if (fileCounter == 0) {
            LOGGER.info("No files to merge sort");
            return;
        }
        LOGGER.info("Number of row pages: " + this.fileCounter);
        // create record holder heap
        createRecordHolderQueue();
        // iterate over file list and create chunk holder and add to heap
        LOGGER.info("Started adding first record from each page");
        for (final UnsafeCarbonRowPage rowPage : rowPages) {
            SortTempChunkHolder sortTempFileChunkHolder = new UnsafeInmemoryHolder(rowPage, parameters.getDimColCount() + parameters.getComplexDimColCount() + parameters.getMeasureColCount(), parameters.getNumberOfSortColumns());
            // initialize
            sortTempFileChunkHolder.readRow();
            recordHolderHeapLocal.add(sortTempFileChunkHolder);
        }
        for (final UnsafeInMemoryIntermediateDataMerger merger : merges) {
            SortTempChunkHolder sortTempFileChunkHolder = new UnsafeFinalMergePageHolder(merger, parameters.getNoDictionarySortColumn(), parameters.getDimColCount() + parameters.getComplexDimColCount() + parameters.getMeasureColCount());
            // initialize
            sortTempFileChunkHolder.readRow();
            recordHolderHeapLocal.add(sortTempFileChunkHolder);
        }
        for (final File file : filesToMergeSort) {
            SortTempChunkHolder sortTempFileChunkHolder = new UnsafeSortTempFileChunkHolder(file, parameters);
            // initialize
            sortTempFileChunkHolder.readRow();
            recordHolderHeapLocal.add(sortTempFileChunkHolder);
        }
        LOGGER.info("Heap Size" + this.recordHolderHeapLocal.size());
    } catch (Exception e) {
        LOGGER.error(e);
        throw new CarbonDataWriterException(e.getMessage());
    }
}
Also used : UnsafeFinalMergePageHolder(org.apache.carbondata.processing.newflow.sort.unsafe.holder.UnsafeFinalMergePageHolder) SortTempChunkHolder(org.apache.carbondata.processing.newflow.sort.unsafe.holder.SortTempChunkHolder) UnsafeCarbonRowPage(org.apache.carbondata.processing.newflow.sort.unsafe.UnsafeCarbonRowPage) File(java.io.File) UnsafeInmemoryHolder(org.apache.carbondata.processing.newflow.sort.unsafe.holder.UnsafeInmemoryHolder) UnsafeSortTempFileChunkHolder(org.apache.carbondata.processing.newflow.sort.unsafe.holder.UnsafeSortTempFileChunkHolder) CarbonDataWriterException(org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException) CarbonDataWriterException(org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException)

Example 3 with UnsafeCarbonRowPage

use of org.apache.carbondata.processing.newflow.sort.unsafe.UnsafeCarbonRowPage in project carbondata by apache.

the class UnsafeInMemoryIntermediateDataMerger method startSorting.

/**
   * Below method will be used to start storing process This method will get
   * all the temp files present in sort temp folder then it will create the
   * record holder heap and then it will read first record from each file and
   * initialize the heap
   *
   * @throws CarbonSortKeyAndGroupByException
   */
private void startSorting() throws CarbonSortKeyAndGroupByException {
    LOGGER.info("Number of row pages in intermediate merger: " + this.holderCounter);
    // create record holder heap
    createRecordHolderQueue(unsafeCarbonRowPages);
    // iterate over file list and create chunk holder and add to heap
    LOGGER.info("Started adding first record from row page");
    UnsafeInmemoryMergeHolder unsafePageHolder = null;
    byte index = 0;
    for (UnsafeCarbonRowPage unsafeCarbonRowPage : unsafeCarbonRowPages) {
        // create chunk holder
        unsafePageHolder = new UnsafeInmemoryMergeHolder(unsafeCarbonRowPage, index++);
        // initialize
        unsafePageHolder.readRow();
        // add to heap
        this.recordHolderHeap.add(unsafePageHolder);
    }
    LOGGER.info("Heap Size" + this.recordHolderHeap.size());
}
Also used : UnsafeCarbonRowPage(org.apache.carbondata.processing.newflow.sort.unsafe.UnsafeCarbonRowPage) UnsafeInmemoryMergeHolder(org.apache.carbondata.processing.newflow.sort.unsafe.holder.UnsafeInmemoryMergeHolder)

Aggregations

UnsafeCarbonRowPage (org.apache.carbondata.processing.newflow.sort.unsafe.UnsafeCarbonRowPage)3 CarbonDataWriterException (org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException)2 File (java.io.File)1 Iterator (java.util.Iterator)1 CarbonIterator (org.apache.carbondata.common.CarbonIterator)1 CarbonDataLoadingException (org.apache.carbondata.processing.newflow.exception.CarbonDataLoadingException)1 CarbonRow (org.apache.carbondata.processing.newflow.row.CarbonRow)1 CarbonRowBatch (org.apache.carbondata.processing.newflow.row.CarbonRowBatch)1 UnsafeSortDataRows (org.apache.carbondata.processing.newflow.sort.unsafe.UnsafeSortDataRows)1 SortTempChunkHolder (org.apache.carbondata.processing.newflow.sort.unsafe.holder.SortTempChunkHolder)1 UnsafeFinalMergePageHolder (org.apache.carbondata.processing.newflow.sort.unsafe.holder.UnsafeFinalMergePageHolder)1 UnsafeInmemoryHolder (org.apache.carbondata.processing.newflow.sort.unsafe.holder.UnsafeInmemoryHolder)1 UnsafeInmemoryMergeHolder (org.apache.carbondata.processing.newflow.sort.unsafe.holder.UnsafeInmemoryMergeHolder)1 UnsafeSortTempFileChunkHolder (org.apache.carbondata.processing.newflow.sort.unsafe.holder.UnsafeSortTempFileChunkHolder)1 CarbonSortKeyAndGroupByException (org.apache.carbondata.processing.sortandgroupby.exception.CarbonSortKeyAndGroupByException)1