use of org.apache.carbondata.processing.newflow.row.CarbonRowBatch in project carbondata by apache.
the class UnsafeParallelReadMergeSorterImpl method sort.
@Override
public Iterator<CarbonRowBatch>[] sort(Iterator<CarbonRowBatch>[] iterators) throws CarbonDataLoadingException {
int inMemoryChunkSizeInMB = CarbonProperties.getInstance().getSortMemoryChunkSizeInMB();
UnsafeSortDataRows sortDataRow = new UnsafeSortDataRows(sortParameters, unsafeIntermediateFileMerger, inMemoryChunkSizeInMB);
final int batchSize = CarbonProperties.getInstance().getBatchSize();
try {
sortDataRow.initialize();
} catch (CarbonSortKeyAndGroupByException e) {
throw new CarbonDataLoadingException(e);
}
this.executorService = Executors.newFixedThreadPool(iterators.length);
this.threadStatusObserver = new ThreadStatusObserver(this.executorService);
try {
for (int i = 0; i < iterators.length; i++) {
executorService.submit(new SortIteratorThread(iterators[i], sortDataRow, batchSize, rowCounter, this.threadStatusObserver));
}
executorService.shutdown();
executorService.awaitTermination(2, TimeUnit.DAYS);
processRowToNextStep(sortDataRow, sortParameters);
} catch (Exception e) {
checkError();
throw new CarbonDataLoadingException("Problem while shutdown the server ", e);
}
checkError();
try {
unsafeIntermediateFileMerger.finish();
List<UnsafeCarbonRowPage> rowPages = unsafeIntermediateFileMerger.getRowPages();
finalMerger.startFinalMerge(rowPages.toArray(new UnsafeCarbonRowPage[rowPages.size()]), unsafeIntermediateFileMerger.getMergedPages());
} catch (CarbonDataWriterException e) {
throw new CarbonDataLoadingException(e);
} catch (CarbonSortKeyAndGroupByException e) {
throw new CarbonDataLoadingException(e);
}
// Creates the iterator to read from merge sorter.
Iterator<CarbonRowBatch> batchIterator = new CarbonIterator<CarbonRowBatch>() {
@Override
public boolean hasNext() {
return finalMerger.hasNext();
}
@Override
public CarbonRowBatch next() {
int counter = 0;
CarbonRowBatch rowBatch = new CarbonRowBatch(batchSize);
while (finalMerger.hasNext() && counter < batchSize) {
rowBatch.addRow(new CarbonRow(finalMerger.next()));
counter++;
}
return rowBatch;
}
};
return new Iterator[] { batchIterator };
}
use of org.apache.carbondata.processing.newflow.row.CarbonRowBatch in project carbondata by apache.
the class ParallelReadMergeSorterImpl method sort.
@Override
public Iterator<CarbonRowBatch>[] sort(Iterator<CarbonRowBatch>[] iterators) throws CarbonDataLoadingException {
SortDataRows sortDataRow = new SortDataRows(sortParameters, intermediateFileMerger);
final int batchSize = CarbonProperties.getInstance().getBatchSize();
try {
sortDataRow.initialize();
} catch (CarbonSortKeyAndGroupByException e) {
throw new CarbonDataLoadingException(e);
}
this.executorService = Executors.newFixedThreadPool(iterators.length);
this.threadStatusObserver = new ThreadStatusObserver(executorService);
try {
for (int i = 0; i < iterators.length; i++) {
executorService.submit(new SortIteratorThread(iterators[i], sortDataRow, batchSize, rowCounter, threadStatusObserver));
}
executorService.shutdown();
executorService.awaitTermination(2, TimeUnit.DAYS);
processRowToNextStep(sortDataRow, sortParameters);
} catch (Exception e) {
checkError();
throw new CarbonDataLoadingException("Problem while shutdown the server ", e);
}
checkError();
try {
intermediateFileMerger.finish();
intermediateFileMerger = null;
finalMerger.startFinalMerge();
} catch (CarbonDataWriterException e) {
throw new CarbonDataLoadingException(e);
} catch (CarbonSortKeyAndGroupByException e) {
throw new CarbonDataLoadingException(e);
}
// Creates the iterator to read from merge sorter.
Iterator<CarbonRowBatch> batchIterator = new CarbonIterator<CarbonRowBatch>() {
@Override
public boolean hasNext() {
return finalMerger.hasNext();
}
@Override
public CarbonRowBatch next() {
int counter = 0;
CarbonRowBatch rowBatch = new CarbonRowBatch(batchSize);
while (finalMerger.hasNext() && counter < batchSize) {
rowBatch.addRow(new CarbonRow(finalMerger.next()));
counter++;
}
return rowBatch;
}
};
return new Iterator[] { batchIterator };
}
use of org.apache.carbondata.processing.newflow.row.CarbonRowBatch in project carbondata by apache.
the class DataConverterProcessorStepImpl method processRowBatch.
/**
* Process the batch of rows as per the step logic.
*
* @param rowBatch
* @return processed row.
*/
protected CarbonRowBatch processRowBatch(CarbonRowBatch rowBatch, RowConverter localConverter) {
CarbonRowBatch newBatch = new CarbonRowBatch(rowBatch.getSize());
while (rowBatch.hasNext()) {
newBatch.addRow(localConverter.convert(rowBatch.next()));
}
rowCounter.getAndAdd(newBatch.getSize());
return newBatch;
}
use of org.apache.carbondata.processing.newflow.row.CarbonRowBatch in project carbondata by apache.
the class DataConverterProcessorWithBucketingStepImpl method processRowBatch.
/**
* Process the batch of rows as per the step logic.
*
* @param rowBatch
* @return processed row.
*/
protected CarbonRowBatch processRowBatch(CarbonRowBatch rowBatch, RowConverter localConverter) {
CarbonRowBatch newBatch = new CarbonRowBatch(rowBatch.getSize());
while (rowBatch.hasNext()) {
CarbonRow next = rowBatch.next();
short bucketNumber = (short) partitioner.getPartition(next.getData());
CarbonRow convertRow = localConverter.convert(next);
convertRow.bucketNumber = bucketNumber;
newBatch.addRow(convertRow);
}
rowCounter.getAndAdd(newBatch.getSize());
return newBatch;
}
use of org.apache.carbondata.processing.newflow.row.CarbonRowBatch in project carbondata by apache.
the class DataWriterBatchProcessorStepImpl method execute.
@Override
public Iterator<CarbonRowBatch>[] execute() throws CarbonDataLoadingException {
Iterator<CarbonRowBatch>[] iterators = child.execute();
CarbonTableIdentifier tableIdentifier = configuration.getTableIdentifier().getCarbonTableIdentifier();
String tableName = tableIdentifier.getTableName();
try {
CarbonTimeStatisticsFactory.getLoadStatisticsInstance().recordDictionaryValue2MdkAdd2FileTime(configuration.getPartitionId(), System.currentTimeMillis());
int i = 0;
for (Iterator<CarbonRowBatch> iterator : iterators) {
String storeLocation = getStoreLocation(tableIdentifier, String.valueOf(i));
int k = 0;
while (iterator.hasNext()) {
CarbonRowBatch next = iterator.next();
CarbonFactDataHandlerModel model = CarbonFactDataHandlerModel.createCarbonFactDataHandlerModel(configuration, storeLocation, i, k++);
CarbonFactHandler dataHandler = CarbonFactHandlerFactory.createCarbonFactHandler(model, CarbonFactHandlerFactory.FactHandlerType.COLUMNAR);
dataHandler.initialise();
processBatch(next, dataHandler);
finish(tableName, dataHandler);
}
i++;
}
} catch (Exception e) {
LOGGER.error(e, "Failed for table: " + tableName + " in DataWriterBatchProcessorStepImpl");
throw new CarbonDataLoadingException("There is an unexpected error: " + e.getMessage());
}
return null;
}
Aggregations