use of org.apache.carbondata.processing.sort.sortdata.SortIntermediateFileMerger in project carbondata by apache.
the class ParallelReadMergeSorterImpl method initialize.
@Override
public void initialize(SortParameters sortParameters) {
this.sortParameters = sortParameters;
intermediateFileMerger = new SortIntermediateFileMerger(sortParameters);
String[] storeLocations = CarbonDataProcessorUtil.getLocalDataFolderLocation(sortParameters.getCarbonTable(), String.valueOf(sortParameters.getTaskNo()), sortParameters.getSegmentId(), false, false);
// Set the data file location
String[] dataFolderLocations = CarbonDataProcessorUtil.arrayAppend(storeLocations, File.separator, CarbonCommonConstants.SORT_TEMP_FILE_LOCATION);
finalMerger = new SingleThreadFinalSortFilesMerger(dataFolderLocations, sortParameters.getTableName(), sortParameters);
// Delete if any older file exists in sort temp folder
CarbonDataProcessorUtil.deleteSortLocationIfExists(sortParameters.getTempFileLocation());
// create new sort temp directory
CarbonDataProcessorUtil.createLocations(sortParameters.getTempFileLocation());
}
use of org.apache.carbondata.processing.sort.sortdata.SortIntermediateFileMerger in project carbondata by apache.
the class ParallelReadMergeSorterWithColumnRangeImpl method sort.
@Override
public Iterator<CarbonRowBatch>[] sort(Iterator<CarbonRowBatch>[] iterators) throws CarbonDataLoadingException {
SortDataRows[] sortDataRows = new SortDataRows[columnRangeInfo.getNumOfRanges()];
intermediateFileMergers = new SortIntermediateFileMerger[columnRangeInfo.getNumOfRanges()];
SortParameters[] sortParameterArray = new SortParameters[columnRangeInfo.getNumOfRanges()];
for (int i = 0; i < columnRangeInfo.getNumOfRanges(); i++) {
SortParameters parameters = originSortParameters.getCopy();
parameters.setPartitionID(i + "");
parameters.setRangeId(i);
sortParameterArray[i] = parameters;
setTempLocation(parameters);
parameters.setBufferSize(sortBufferSize);
intermediateFileMergers[i] = new SortIntermediateFileMerger(parameters);
sortDataRows[i] = new SortDataRows(parameters, intermediateFileMergers[i]);
sortDataRows[i].initialize();
}
ExecutorService executorService = Executors.newFixedThreadPool(iterators.length);
this.threadStatusObserver = new ThreadStatusObserver(executorService);
final int batchSize = CarbonProperties.getInstance().getBatchSize();
try {
// dispatch rows to sortDataRows by range id
for (int i = 0; i < iterators.length; i++) {
executorService.execute(new SortIteratorThread(iterators[i], sortDataRows, rowCounter, this.insideRowCounterList, this.threadStatusObserver));
}
executorService.shutdown();
executorService.awaitTermination(2, TimeUnit.DAYS);
processRowToNextStep(sortDataRows, originSortParameters);
} catch (Exception e) {
checkError();
throw new CarbonDataLoadingException("Problem while shutdown the server ", e);
}
checkError();
try {
for (int i = 0; i < intermediateFileMergers.length; i++) {
intermediateFileMergers[i].finish();
}
} catch (CarbonDataWriterException | CarbonSortKeyAndGroupByException e) {
throw new CarbonDataLoadingException(e);
}
Iterator<CarbonRowBatch>[] batchIterator = new Iterator[columnRangeInfo.getNumOfRanges()];
for (int i = 0; i < columnRangeInfo.getNumOfRanges(); i++) {
batchIterator[i] = new MergedDataIterator(sortParameterArray[i], batchSize);
}
return batchIterator;
}
use of org.apache.carbondata.processing.sort.sortdata.SortIntermediateFileMerger in project carbondata by apache.
the class CompactionResultSortProcessor method initSortDataRows.
/**
* create an instance of sort data rows
*/
private void initSortDataRows() {
measureCount = carbonTable.getVisibleMeasures().size();
dimensions = new ArrayList<>(2);
dimensions.addAll(segmentProperties.getDimensions());
dimensions.addAll(segmentProperties.getComplexDimensions());
noDictionaryColMapping = new boolean[dimensions.size()];
sortColumnMapping = new boolean[dimensions.size()];
isVarcharDimMapping = new boolean[dimensions.size()];
int i = 0;
for (CarbonDimension dimension : dimensions) {
if (dimension.isSortColumn()) {
sortColumnMapping[i] = true;
}
if (dimension.getDataType() == DataTypes.DATE) {
i++;
continue;
}
noDictionaryColMapping[i] = true;
if (dimension.getColumnSchema().getDataType() == DataTypes.VARCHAR) {
isVarcharDimMapping[i] = true;
}
i++;
noDictionaryCount++;
}
dimensionColumnCount = dimensions.size();
sortParameters = createSortParameters();
intermediateFileMerger = new SortIntermediateFileMerger(sortParameters);
// Delete if any older file exists in sort temp folder
CarbonDataProcessorUtil.deleteSortLocationIfExists(sortParameters.getTempFileLocation());
// create new sort temp directory
CarbonDataProcessorUtil.createLocations(sortParameters.getTempFileLocation());
// TODO: Now it is only supported onheap merge, but we can have unsafe merge
// as well by using UnsafeSortDataRows.
this.sortDataRows = new SortDataRows(sortParameters, intermediateFileMerger);
this.sortDataRows.initialize();
}
use of org.apache.carbondata.processing.sort.sortdata.SortIntermediateFileMerger in project carbondata by apache.
the class SecondaryIndexQueryResultProcessor method initSortDataRows.
/**
* create an instance of sort data rows
*/
private void initSortDataRows() throws SecondaryIndexException {
measureCount = indexTable.getVisibleMeasures().size();
implicitColumnCount = indexTable.getImplicitDimensions().size();
List<CarbonDimension> dimensions = indexTable.getVisibleDimensions();
noDictionaryColMapping = new boolean[dimensions.size()];
sortColumnMapping = new boolean[dimensions.size()];
isVarcharDimMapping = new boolean[dimensions.size()];
int i = 0;
for (CarbonDimension dimension : dimensions) {
if (dimension.isSortColumn()) {
sortColumnMapping[i] = true;
}
if (CarbonUtil.hasEncoding(dimension.getEncoder(), Encoding.DICTIONARY)) {
i++;
continue;
}
noDictionaryColMapping[i] = true;
if (dimension.getColumnSchema().getDataType() == DataTypes.VARCHAR) {
isVarcharDimMapping[i] = true;
}
i++;
noDictionaryCount++;
}
dimensionColumnCount = dimensions.size();
sortParameters = createSortParameters();
CarbonDataProcessorUtil.deleteSortLocationIfExists(sortParameters.getTempFileLocation());
CarbonDataProcessorUtil.createLocations(sortParameters.getTempFileLocation());
intermediateFileMerger = new SortIntermediateFileMerger(sortParameters);
this.sortDataRows = new SortDataRows(sortParameters, intermediateFileMerger);
this.sortDataRows.initialize();
}
Aggregations