use of org.apache.carbondata.processing.sortandgroupby.sortdata.SortParameters in project carbondata by apache.
the class UnsafeParallelReadMergeSorterWithBucketingImpl method sort.
@Override
public Iterator<CarbonRowBatch>[] sort(Iterator<CarbonRowBatch>[] iterators) throws CarbonDataLoadingException {
UnsafeSortDataRows[] sortDataRows = new UnsafeSortDataRows[bucketingInfo.getNumberOfBuckets()];
UnsafeIntermediateMerger[] intermediateFileMergers = new UnsafeIntermediateMerger[sortDataRows.length];
int inMemoryChunkSizeInMB = CarbonProperties.getInstance().getSortMemoryChunkSizeInMB();
inMemoryChunkSizeInMB = inMemoryChunkSizeInMB / bucketingInfo.getNumberOfBuckets();
if (inMemoryChunkSizeInMB < 5) {
inMemoryChunkSizeInMB = 5;
}
try {
for (int i = 0; i < bucketingInfo.getNumberOfBuckets(); i++) {
SortParameters parameters = sortParameters.getCopy();
parameters.setPartitionID(i + "");
setTempLocation(parameters);
intermediateFileMergers[i] = new UnsafeIntermediateMerger(parameters);
sortDataRows[i] = new UnsafeSortDataRows(parameters, intermediateFileMergers[i], inMemoryChunkSizeInMB);
sortDataRows[i].initialize();
}
} catch (CarbonSortKeyAndGroupByException e) {
throw new CarbonDataLoadingException(e);
}
this.executorService = Executors.newFixedThreadPool(iterators.length);
final int batchSize = CarbonProperties.getInstance().getBatchSize();
try {
for (int i = 0; i < iterators.length; i++) {
executorService.submit(new SortIteratorThread(iterators[i], sortDataRows));
}
executorService.shutdown();
executorService.awaitTermination(2, TimeUnit.DAYS);
processRowToNextStep(sortDataRows, sortParameters);
} catch (Exception e) {
throw new CarbonDataLoadingException("Problem while shutdown the server ", e);
}
try {
for (int i = 0; i < intermediateFileMergers.length; i++) {
intermediateFileMergers[i].finish();
}
} catch (Exception e) {
throw new CarbonDataLoadingException(e);
}
Iterator<CarbonRowBatch>[] batchIterator = new Iterator[bucketingInfo.getNumberOfBuckets()];
for (int i = 0; i < sortDataRows.length; i++) {
batchIterator[i] = new MergedDataIterator(String.valueOf(i), batchSize, intermediateFileMergers[i]);
}
return batchIterator;
}
use of org.apache.carbondata.processing.sortandgroupby.sortdata.SortParameters in project carbondata by apache.
the class ParallelReadMergeSorterWithBucketingImpl method sort.
@Override
public Iterator<CarbonRowBatch>[] sort(Iterator<CarbonRowBatch>[] iterators) throws CarbonDataLoadingException {
SortDataRows[] sortDataRows = new SortDataRows[bucketingInfo.getNumberOfBuckets()];
intermediateFileMergers = new SortIntermediateFileMerger[sortDataRows.length];
try {
for (int i = 0; i < bucketingInfo.getNumberOfBuckets(); i++) {
SortParameters parameters = sortParameters.getCopy();
parameters.setPartitionID(i + "");
setTempLocation(parameters);
parameters.setBufferSize(sortBufferSize);
intermediateFileMergers[i] = new SortIntermediateFileMerger(parameters);
sortDataRows[i] = new SortDataRows(parameters, intermediateFileMergers[i]);
sortDataRows[i].initialize();
}
} catch (CarbonSortKeyAndGroupByException e) {
throw new CarbonDataLoadingException(e);
}
this.executorService = Executors.newFixedThreadPool(iterators.length);
this.threadStatusObserver = new ThreadStatusObserver(this.executorService);
final int batchSize = CarbonProperties.getInstance().getBatchSize();
try {
for (int i = 0; i < iterators.length; i++) {
executorService.submit(new SortIteratorThread(iterators[i], sortDataRows, rowCounter, this.threadStatusObserver));
}
executorService.shutdown();
executorService.awaitTermination(2, TimeUnit.DAYS);
processRowToNextStep(sortDataRows, sortParameters);
} catch (Exception e) {
checkError();
throw new CarbonDataLoadingException("Problem while shutdown the server ", e);
}
checkError();
try {
for (int i = 0; i < intermediateFileMergers.length; i++) {
intermediateFileMergers[i].finish();
}
} catch (CarbonDataWriterException e) {
throw new CarbonDataLoadingException(e);
} catch (CarbonSortKeyAndGroupByException e) {
throw new CarbonDataLoadingException(e);
}
Iterator<CarbonRowBatch>[] batchIterator = new Iterator[bucketingInfo.getNumberOfBuckets()];
for (int i = 0; i < bucketingInfo.getNumberOfBuckets(); i++) {
batchIterator[i] = new MergedDataIterator(String.valueOf(i), batchSize);
}
return batchIterator;
}
use of org.apache.carbondata.processing.sortandgroupby.sortdata.SortParameters in project carbondata by apache.
the class SortProcessorStepImpl method initialize.
@Override
public void initialize() throws IOException {
child.initialize();
SortParameters sortParameters = SortParameters.createSortParameters(configuration);
sorter = SorterFactory.createSorter(configuration, rowCounter);
sorter.initialize(sortParameters);
}
use of org.apache.carbondata.processing.sortandgroupby.sortdata.SortParameters in project carbondata by apache.
the class CompactionResultSortProcessor method initSortDataRows.
/**
* create an instance of sort data rows
*/
private void initSortDataRows() throws Exception {
measureCount = carbonTable.getMeasureByTableName(tableName).size();
List<CarbonDimension> dimensions = carbonTable.getDimensionByTableName(tableName);
noDictionaryColMapping = new boolean[dimensions.size()];
int i = 0;
for (CarbonDimension dimension : dimensions) {
if (CarbonUtil.hasEncoding(dimension.getEncoder(), Encoding.DICTIONARY)) {
i++;
continue;
}
noDictionaryColMapping[i++] = true;
noDictionaryCount++;
}
dimensionColumnCount = dimensions.size();
SortParameters parameters = createSortParameters();
intermediateFileMerger = new SortIntermediateFileMerger(parameters);
// TODO: Now it is only supported onheap merge, but we can have unsafe merge
// as well by using UnsafeSortDataRows.
this.sortDataRows = new SortDataRows(parameters, intermediateFileMerger);
try {
this.sortDataRows.initialize();
} catch (CarbonSortKeyAndGroupByException e) {
LOGGER.error(e);
throw new Exception("Error initializing sort data rows object during compaction: " + e.getMessage());
}
}
Aggregations