Search in sources :

Example 1 with SortIntermediateFileMerger

use of org.apache.carbondata.processing.sort.sortdata.SortIntermediateFileMerger in project carbondata by apache.

the class ParallelReadMergeSorterImpl method initialize.

@Override
public void initialize(SortParameters sortParameters) {
    this.sortParameters = sortParameters;
    intermediateFileMerger = new SortIntermediateFileMerger(sortParameters);
    String[] storeLocations = CarbonDataProcessorUtil.getLocalDataFolderLocation(sortParameters.getCarbonTable(), String.valueOf(sortParameters.getTaskNo()), sortParameters.getSegmentId(), false, false);
    // Set the data file location
    String[] dataFolderLocations = CarbonDataProcessorUtil.arrayAppend(storeLocations, File.separator, CarbonCommonConstants.SORT_TEMP_FILE_LOCATION);
    finalMerger = new SingleThreadFinalSortFilesMerger(dataFolderLocations, sortParameters.getTableName(), sortParameters);
    // Delete if any older file exists in sort temp folder
    CarbonDataProcessorUtil.deleteSortLocationIfExists(sortParameters.getTempFileLocation());
    // create new sort temp directory
    CarbonDataProcessorUtil.createLocations(sortParameters.getTempFileLocation());
}
Also used : SortIntermediateFileMerger(org.apache.carbondata.processing.sort.sortdata.SortIntermediateFileMerger) SingleThreadFinalSortFilesMerger(org.apache.carbondata.processing.sort.sortdata.SingleThreadFinalSortFilesMerger)

Example 2 with SortIntermediateFileMerger

use of org.apache.carbondata.processing.sort.sortdata.SortIntermediateFileMerger in project carbondata by apache.

the class ParallelReadMergeSorterWithColumnRangeImpl method sort.

@Override
public Iterator<CarbonRowBatch>[] sort(Iterator<CarbonRowBatch>[] iterators) throws CarbonDataLoadingException {
    SortDataRows[] sortDataRows = new SortDataRows[columnRangeInfo.getNumOfRanges()];
    intermediateFileMergers = new SortIntermediateFileMerger[columnRangeInfo.getNumOfRanges()];
    SortParameters[] sortParameterArray = new SortParameters[columnRangeInfo.getNumOfRanges()];
    for (int i = 0; i < columnRangeInfo.getNumOfRanges(); i++) {
        SortParameters parameters = originSortParameters.getCopy();
        parameters.setPartitionID(i + "");
        parameters.setRangeId(i);
        sortParameterArray[i] = parameters;
        setTempLocation(parameters);
        parameters.setBufferSize(sortBufferSize);
        intermediateFileMergers[i] = new SortIntermediateFileMerger(parameters);
        sortDataRows[i] = new SortDataRows(parameters, intermediateFileMergers[i]);
        sortDataRows[i].initialize();
    }
    ExecutorService executorService = Executors.newFixedThreadPool(iterators.length);
    this.threadStatusObserver = new ThreadStatusObserver(executorService);
    final int batchSize = CarbonProperties.getInstance().getBatchSize();
    try {
        // dispatch rows to sortDataRows by range id
        for (int i = 0; i < iterators.length; i++) {
            executorService.execute(new SortIteratorThread(iterators[i], sortDataRows, rowCounter, this.insideRowCounterList, this.threadStatusObserver));
        }
        executorService.shutdown();
        executorService.awaitTermination(2, TimeUnit.DAYS);
        processRowToNextStep(sortDataRows, originSortParameters);
    } catch (Exception e) {
        checkError();
        throw new CarbonDataLoadingException("Problem while shutdown the server ", e);
    }
    checkError();
    try {
        for (int i = 0; i < intermediateFileMergers.length; i++) {
            intermediateFileMergers[i].finish();
        }
    } catch (CarbonDataWriterException | CarbonSortKeyAndGroupByException e) {
        throw new CarbonDataLoadingException(e);
    }
    Iterator<CarbonRowBatch>[] batchIterator = new Iterator[columnRangeInfo.getNumOfRanges()];
    for (int i = 0; i < columnRangeInfo.getNumOfRanges(); i++) {
        batchIterator[i] = new MergedDataIterator(sortParameterArray[i], batchSize);
    }
    return batchIterator;
}
Also used : CarbonDataLoadingException(org.apache.carbondata.processing.loading.exception.CarbonDataLoadingException) CarbonDataWriterException(org.apache.carbondata.core.datastore.exception.CarbonDataWriterException) SortIntermediateFileMerger(org.apache.carbondata.processing.sort.sortdata.SortIntermediateFileMerger) CarbonSortKeyAndGroupByException(org.apache.carbondata.processing.sort.exception.CarbonSortKeyAndGroupByException) CarbonDataWriterException(org.apache.carbondata.core.datastore.exception.CarbonDataWriterException) CarbonDataLoadingException(org.apache.carbondata.processing.loading.exception.CarbonDataLoadingException) SortDataRows(org.apache.carbondata.processing.sort.sortdata.SortDataRows) SortParameters(org.apache.carbondata.processing.sort.sortdata.SortParameters) ExecutorService(java.util.concurrent.ExecutorService) CarbonIterator(org.apache.carbondata.common.CarbonIterator) Iterator(java.util.Iterator) CarbonSortKeyAndGroupByException(org.apache.carbondata.processing.sort.exception.CarbonSortKeyAndGroupByException)

Example 3 with SortIntermediateFileMerger

use of org.apache.carbondata.processing.sort.sortdata.SortIntermediateFileMerger in project carbondata by apache.

the class CompactionResultSortProcessor method initSortDataRows.

/**
 * create an instance of sort data rows
 */
private void initSortDataRows() {
    measureCount = carbonTable.getVisibleMeasures().size();
    dimensions = new ArrayList<>(2);
    dimensions.addAll(segmentProperties.getDimensions());
    dimensions.addAll(segmentProperties.getComplexDimensions());
    noDictionaryColMapping = new boolean[dimensions.size()];
    sortColumnMapping = new boolean[dimensions.size()];
    isVarcharDimMapping = new boolean[dimensions.size()];
    int i = 0;
    for (CarbonDimension dimension : dimensions) {
        if (dimension.isSortColumn()) {
            sortColumnMapping[i] = true;
        }
        if (dimension.getDataType() == DataTypes.DATE) {
            i++;
            continue;
        }
        noDictionaryColMapping[i] = true;
        if (dimension.getColumnSchema().getDataType() == DataTypes.VARCHAR) {
            isVarcharDimMapping[i] = true;
        }
        i++;
        noDictionaryCount++;
    }
    dimensionColumnCount = dimensions.size();
    sortParameters = createSortParameters();
    intermediateFileMerger = new SortIntermediateFileMerger(sortParameters);
    // Delete if any older file exists in sort temp folder
    CarbonDataProcessorUtil.deleteSortLocationIfExists(sortParameters.getTempFileLocation());
    // create new sort temp directory
    CarbonDataProcessorUtil.createLocations(sortParameters.getTempFileLocation());
    // TODO: Now it is only supported onheap merge, but we can have unsafe merge
    // as well by using UnsafeSortDataRows.
    this.sortDataRows = new SortDataRows(sortParameters, intermediateFileMerger);
    this.sortDataRows.initialize();
}
Also used : SortDataRows(org.apache.carbondata.processing.sort.sortdata.SortDataRows) SortIntermediateFileMerger(org.apache.carbondata.processing.sort.sortdata.SortIntermediateFileMerger) CarbonDimension(org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension)

Example 4 with SortIntermediateFileMerger

use of org.apache.carbondata.processing.sort.sortdata.SortIntermediateFileMerger in project carbondata by apache.

the class SecondaryIndexQueryResultProcessor method initSortDataRows.

/**
 * create an instance of sort data rows
 */
private void initSortDataRows() throws SecondaryIndexException {
    measureCount = indexTable.getVisibleMeasures().size();
    implicitColumnCount = indexTable.getImplicitDimensions().size();
    List<CarbonDimension> dimensions = indexTable.getVisibleDimensions();
    noDictionaryColMapping = new boolean[dimensions.size()];
    sortColumnMapping = new boolean[dimensions.size()];
    isVarcharDimMapping = new boolean[dimensions.size()];
    int i = 0;
    for (CarbonDimension dimension : dimensions) {
        if (dimension.isSortColumn()) {
            sortColumnMapping[i] = true;
        }
        if (CarbonUtil.hasEncoding(dimension.getEncoder(), Encoding.DICTIONARY)) {
            i++;
            continue;
        }
        noDictionaryColMapping[i] = true;
        if (dimension.getColumnSchema().getDataType() == DataTypes.VARCHAR) {
            isVarcharDimMapping[i] = true;
        }
        i++;
        noDictionaryCount++;
    }
    dimensionColumnCount = dimensions.size();
    sortParameters = createSortParameters();
    CarbonDataProcessorUtil.deleteSortLocationIfExists(sortParameters.getTempFileLocation());
    CarbonDataProcessorUtil.createLocations(sortParameters.getTempFileLocation());
    intermediateFileMerger = new SortIntermediateFileMerger(sortParameters);
    this.sortDataRows = new SortDataRows(sortParameters, intermediateFileMerger);
    this.sortDataRows.initialize();
}
Also used : SortDataRows(org.apache.carbondata.processing.sort.sortdata.SortDataRows) SortIntermediateFileMerger(org.apache.carbondata.processing.sort.sortdata.SortIntermediateFileMerger) CarbonDimension(org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension)

Aggregations

SortIntermediateFileMerger (org.apache.carbondata.processing.sort.sortdata.SortIntermediateFileMerger)4 SortDataRows (org.apache.carbondata.processing.sort.sortdata.SortDataRows)3 CarbonDimension (org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension)2 Iterator (java.util.Iterator)1 ExecutorService (java.util.concurrent.ExecutorService)1 CarbonIterator (org.apache.carbondata.common.CarbonIterator)1 CarbonDataWriterException (org.apache.carbondata.core.datastore.exception.CarbonDataWriterException)1 CarbonDataLoadingException (org.apache.carbondata.processing.loading.exception.CarbonDataLoadingException)1 CarbonSortKeyAndGroupByException (org.apache.carbondata.processing.sort.exception.CarbonSortKeyAndGroupByException)1 SingleThreadFinalSortFilesMerger (org.apache.carbondata.processing.sort.sortdata.SingleThreadFinalSortFilesMerger)1 SortParameters (org.apache.carbondata.processing.sort.sortdata.SortParameters)1