Search in sources :

Example 1 with SortParameters

use of org.apache.carbondata.processing.sortandgroupby.sortdata.SortParameters in project carbondata by apache.

the class UnsafeParallelReadMergeSorterWithBucketingImpl method sort.

@Override
public Iterator<CarbonRowBatch>[] sort(Iterator<CarbonRowBatch>[] iterators) throws CarbonDataLoadingException {
    UnsafeSortDataRows[] sortDataRows = new UnsafeSortDataRows[bucketingInfo.getNumberOfBuckets()];
    UnsafeIntermediateMerger[] intermediateFileMergers = new UnsafeIntermediateMerger[sortDataRows.length];
    int inMemoryChunkSizeInMB = CarbonProperties.getInstance().getSortMemoryChunkSizeInMB();
    inMemoryChunkSizeInMB = inMemoryChunkSizeInMB / bucketingInfo.getNumberOfBuckets();
    if (inMemoryChunkSizeInMB < 5) {
        inMemoryChunkSizeInMB = 5;
    }
    try {
        for (int i = 0; i < bucketingInfo.getNumberOfBuckets(); i++) {
            SortParameters parameters = sortParameters.getCopy();
            parameters.setPartitionID(i + "");
            setTempLocation(parameters);
            intermediateFileMergers[i] = new UnsafeIntermediateMerger(parameters);
            sortDataRows[i] = new UnsafeSortDataRows(parameters, intermediateFileMergers[i], inMemoryChunkSizeInMB);
            sortDataRows[i].initialize();
        }
    } catch (CarbonSortKeyAndGroupByException e) {
        throw new CarbonDataLoadingException(e);
    }
    this.executorService = Executors.newFixedThreadPool(iterators.length);
    final int batchSize = CarbonProperties.getInstance().getBatchSize();
    try {
        for (int i = 0; i < iterators.length; i++) {
            executorService.submit(new SortIteratorThread(iterators[i], sortDataRows));
        }
        executorService.shutdown();
        executorService.awaitTermination(2, TimeUnit.DAYS);
        processRowToNextStep(sortDataRows, sortParameters);
    } catch (Exception e) {
        throw new CarbonDataLoadingException("Problem while shutdown the server ", e);
    }
    try {
        for (int i = 0; i < intermediateFileMergers.length; i++) {
            intermediateFileMergers[i].finish();
        }
    } catch (Exception e) {
        throw new CarbonDataLoadingException(e);
    }
    Iterator<CarbonRowBatch>[] batchIterator = new Iterator[bucketingInfo.getNumberOfBuckets()];
    for (int i = 0; i < sortDataRows.length; i++) {
        batchIterator[i] = new MergedDataIterator(String.valueOf(i), batchSize, intermediateFileMergers[i]);
    }
    return batchIterator;
}
Also used : CarbonDataLoadingException(org.apache.carbondata.processing.newflow.exception.CarbonDataLoadingException) UnsafeIntermediateMerger(org.apache.carbondata.processing.newflow.sort.unsafe.merger.UnsafeIntermediateMerger) CarbonSortKeyAndGroupByException(org.apache.carbondata.processing.sortandgroupby.exception.CarbonSortKeyAndGroupByException) CarbonDataLoadingException(org.apache.carbondata.processing.newflow.exception.CarbonDataLoadingException) SortParameters(org.apache.carbondata.processing.sortandgroupby.sortdata.SortParameters) CarbonIterator(org.apache.carbondata.common.CarbonIterator) Iterator(java.util.Iterator) CarbonSortKeyAndGroupByException(org.apache.carbondata.processing.sortandgroupby.exception.CarbonSortKeyAndGroupByException) UnsafeSortDataRows(org.apache.carbondata.processing.newflow.sort.unsafe.UnsafeSortDataRows)

Example 2 with SortParameters

use of org.apache.carbondata.processing.sortandgroupby.sortdata.SortParameters in project carbondata by apache.

the class ParallelReadMergeSorterWithBucketingImpl method sort.

@Override
public Iterator<CarbonRowBatch>[] sort(Iterator<CarbonRowBatch>[] iterators) throws CarbonDataLoadingException {
    SortDataRows[] sortDataRows = new SortDataRows[bucketingInfo.getNumberOfBuckets()];
    intermediateFileMergers = new SortIntermediateFileMerger[sortDataRows.length];
    try {
        for (int i = 0; i < bucketingInfo.getNumberOfBuckets(); i++) {
            SortParameters parameters = sortParameters.getCopy();
            parameters.setPartitionID(i + "");
            setTempLocation(parameters);
            parameters.setBufferSize(sortBufferSize);
            intermediateFileMergers[i] = new SortIntermediateFileMerger(parameters);
            sortDataRows[i] = new SortDataRows(parameters, intermediateFileMergers[i]);
            sortDataRows[i].initialize();
        }
    } catch (CarbonSortKeyAndGroupByException e) {
        throw new CarbonDataLoadingException(e);
    }
    this.executorService = Executors.newFixedThreadPool(iterators.length);
    this.threadStatusObserver = new ThreadStatusObserver(this.executorService);
    final int batchSize = CarbonProperties.getInstance().getBatchSize();
    try {
        for (int i = 0; i < iterators.length; i++) {
            executorService.submit(new SortIteratorThread(iterators[i], sortDataRows, rowCounter, this.threadStatusObserver));
        }
        executorService.shutdown();
        executorService.awaitTermination(2, TimeUnit.DAYS);
        processRowToNextStep(sortDataRows, sortParameters);
    } catch (Exception e) {
        checkError();
        throw new CarbonDataLoadingException("Problem while shutdown the server ", e);
    }
    checkError();
    try {
        for (int i = 0; i < intermediateFileMergers.length; i++) {
            intermediateFileMergers[i].finish();
        }
    } catch (CarbonDataWriterException e) {
        throw new CarbonDataLoadingException(e);
    } catch (CarbonSortKeyAndGroupByException e) {
        throw new CarbonDataLoadingException(e);
    }
    Iterator<CarbonRowBatch>[] batchIterator = new Iterator[bucketingInfo.getNumberOfBuckets()];
    for (int i = 0; i < bucketingInfo.getNumberOfBuckets(); i++) {
        batchIterator[i] = new MergedDataIterator(String.valueOf(i), batchSize);
    }
    return batchIterator;
}
Also used : CarbonDataLoadingException(org.apache.carbondata.processing.newflow.exception.CarbonDataLoadingException) CarbonDataWriterException(org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException) SortIntermediateFileMerger(org.apache.carbondata.processing.sortandgroupby.sortdata.SortIntermediateFileMerger) CarbonSortKeyAndGroupByException(org.apache.carbondata.processing.sortandgroupby.exception.CarbonSortKeyAndGroupByException) CarbonDataWriterException(org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException) CarbonDataLoadingException(org.apache.carbondata.processing.newflow.exception.CarbonDataLoadingException) SortDataRows(org.apache.carbondata.processing.sortandgroupby.sortdata.SortDataRows) SortParameters(org.apache.carbondata.processing.sortandgroupby.sortdata.SortParameters) CarbonIterator(org.apache.carbondata.common.CarbonIterator) Iterator(java.util.Iterator) CarbonSortKeyAndGroupByException(org.apache.carbondata.processing.sortandgroupby.exception.CarbonSortKeyAndGroupByException)

Example 3 with SortParameters

use of org.apache.carbondata.processing.sortandgroupby.sortdata.SortParameters in project carbondata by apache.

the class SortProcessorStepImpl method initialize.

@Override
public void initialize() throws IOException {
    child.initialize();
    SortParameters sortParameters = SortParameters.createSortParameters(configuration);
    sorter = SorterFactory.createSorter(configuration, rowCounter);
    sorter.initialize(sortParameters);
}
Also used : SortParameters(org.apache.carbondata.processing.sortandgroupby.sortdata.SortParameters)

Example 4 with SortParameters

use of org.apache.carbondata.processing.sortandgroupby.sortdata.SortParameters in project carbondata by apache.

the class CompactionResultSortProcessor method initSortDataRows.

/**
   * create an instance of sort data rows
   */
private void initSortDataRows() throws Exception {
    measureCount = carbonTable.getMeasureByTableName(tableName).size();
    List<CarbonDimension> dimensions = carbonTable.getDimensionByTableName(tableName);
    noDictionaryColMapping = new boolean[dimensions.size()];
    int i = 0;
    for (CarbonDimension dimension : dimensions) {
        if (CarbonUtil.hasEncoding(dimension.getEncoder(), Encoding.DICTIONARY)) {
            i++;
            continue;
        }
        noDictionaryColMapping[i++] = true;
        noDictionaryCount++;
    }
    dimensionColumnCount = dimensions.size();
    SortParameters parameters = createSortParameters();
    intermediateFileMerger = new SortIntermediateFileMerger(parameters);
    // TODO: Now it is only supported onheap merge, but we can have unsafe merge
    // as well by using UnsafeSortDataRows.
    this.sortDataRows = new SortDataRows(parameters, intermediateFileMerger);
    try {
        this.sortDataRows.initialize();
    } catch (CarbonSortKeyAndGroupByException e) {
        LOGGER.error(e);
        throw new Exception("Error initializing sort data rows object during compaction: " + e.getMessage());
    }
}
Also used : SortDataRows(org.apache.carbondata.processing.sortandgroupby.sortdata.SortDataRows) SortParameters(org.apache.carbondata.processing.sortandgroupby.sortdata.SortParameters) CarbonSortKeyAndGroupByException(org.apache.carbondata.processing.sortandgroupby.exception.CarbonSortKeyAndGroupByException) SortIntermediateFileMerger(org.apache.carbondata.processing.sortandgroupby.sortdata.SortIntermediateFileMerger) CarbonSortKeyAndGroupByException(org.apache.carbondata.processing.sortandgroupby.exception.CarbonSortKeyAndGroupByException) CarbonDataWriterException(org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException) IOException(java.io.IOException) CarbonDimension(org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension)

Aggregations

SortParameters (org.apache.carbondata.processing.sortandgroupby.sortdata.SortParameters)4 CarbonSortKeyAndGroupByException (org.apache.carbondata.processing.sortandgroupby.exception.CarbonSortKeyAndGroupByException)3 Iterator (java.util.Iterator)2 CarbonIterator (org.apache.carbondata.common.CarbonIterator)2 CarbonDataLoadingException (org.apache.carbondata.processing.newflow.exception.CarbonDataLoadingException)2 SortDataRows (org.apache.carbondata.processing.sortandgroupby.sortdata.SortDataRows)2 SortIntermediateFileMerger (org.apache.carbondata.processing.sortandgroupby.sortdata.SortIntermediateFileMerger)2 CarbonDataWriterException (org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException)2 IOException (java.io.IOException)1 CarbonDimension (org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension)1 UnsafeSortDataRows (org.apache.carbondata.processing.newflow.sort.unsafe.UnsafeSortDataRows)1 UnsafeIntermediateMerger (org.apache.carbondata.processing.newflow.sort.unsafe.merger.UnsafeIntermediateMerger)1