use of org.apache.carbondata.processing.newflow.row.CarbonRow in project carbondata by apache.
the class UnsafeParallelReadMergeSorterImpl method sort.
@Override
public Iterator<CarbonRowBatch>[] sort(Iterator<CarbonRowBatch>[] iterators) throws CarbonDataLoadingException {
int inMemoryChunkSizeInMB = CarbonProperties.getInstance().getSortMemoryChunkSizeInMB();
UnsafeSortDataRows sortDataRow = new UnsafeSortDataRows(sortParameters, unsafeIntermediateFileMerger, inMemoryChunkSizeInMB);
final int batchSize = CarbonProperties.getInstance().getBatchSize();
try {
sortDataRow.initialize();
} catch (CarbonSortKeyAndGroupByException e) {
throw new CarbonDataLoadingException(e);
}
this.executorService = Executors.newFixedThreadPool(iterators.length);
this.threadStatusObserver = new ThreadStatusObserver(this.executorService);
try {
for (int i = 0; i < iterators.length; i++) {
executorService.submit(new SortIteratorThread(iterators[i], sortDataRow, batchSize, rowCounter, this.threadStatusObserver));
}
executorService.shutdown();
executorService.awaitTermination(2, TimeUnit.DAYS);
processRowToNextStep(sortDataRow, sortParameters);
} catch (Exception e) {
checkError();
throw new CarbonDataLoadingException("Problem while shutdown the server ", e);
}
checkError();
try {
unsafeIntermediateFileMerger.finish();
List<UnsafeCarbonRowPage> rowPages = unsafeIntermediateFileMerger.getRowPages();
finalMerger.startFinalMerge(rowPages.toArray(new UnsafeCarbonRowPage[rowPages.size()]), unsafeIntermediateFileMerger.getMergedPages());
} catch (CarbonDataWriterException e) {
throw new CarbonDataLoadingException(e);
} catch (CarbonSortKeyAndGroupByException e) {
throw new CarbonDataLoadingException(e);
}
// Creates the iterator to read from merge sorter.
Iterator<CarbonRowBatch> batchIterator = new CarbonIterator<CarbonRowBatch>() {
@Override
public boolean hasNext() {
return finalMerger.hasNext();
}
@Override
public CarbonRowBatch next() {
int counter = 0;
CarbonRowBatch rowBatch = new CarbonRowBatch(batchSize);
while (finalMerger.hasNext() && counter < batchSize) {
rowBatch.addRow(new CarbonRow(finalMerger.next()));
counter++;
}
return rowBatch;
}
};
return new Iterator[] { batchIterator };
}
use of org.apache.carbondata.processing.newflow.row.CarbonRow in project carbondata by apache.
the class ParallelReadMergeSorterImpl method sort.
@Override
public Iterator<CarbonRowBatch>[] sort(Iterator<CarbonRowBatch>[] iterators) throws CarbonDataLoadingException {
SortDataRows sortDataRow = new SortDataRows(sortParameters, intermediateFileMerger);
final int batchSize = CarbonProperties.getInstance().getBatchSize();
try {
sortDataRow.initialize();
} catch (CarbonSortKeyAndGroupByException e) {
throw new CarbonDataLoadingException(e);
}
this.executorService = Executors.newFixedThreadPool(iterators.length);
this.threadStatusObserver = new ThreadStatusObserver(executorService);
try {
for (int i = 0; i < iterators.length; i++) {
executorService.submit(new SortIteratorThread(iterators[i], sortDataRow, batchSize, rowCounter, threadStatusObserver));
}
executorService.shutdown();
executorService.awaitTermination(2, TimeUnit.DAYS);
processRowToNextStep(sortDataRow, sortParameters);
} catch (Exception e) {
checkError();
throw new CarbonDataLoadingException("Problem while shutdown the server ", e);
}
checkError();
try {
intermediateFileMerger.finish();
intermediateFileMerger = null;
finalMerger.startFinalMerge();
} catch (CarbonDataWriterException e) {
throw new CarbonDataLoadingException(e);
} catch (CarbonSortKeyAndGroupByException e) {
throw new CarbonDataLoadingException(e);
}
// Creates the iterator to read from merge sorter.
Iterator<CarbonRowBatch> batchIterator = new CarbonIterator<CarbonRowBatch>() {
@Override
public boolean hasNext() {
return finalMerger.hasNext();
}
@Override
public CarbonRowBatch next() {
int counter = 0;
CarbonRowBatch rowBatch = new CarbonRowBatch(batchSize);
while (finalMerger.hasNext() && counter < batchSize) {
rowBatch.addRow(new CarbonRow(finalMerger.next()));
counter++;
}
return rowBatch;
}
};
return new Iterator[] { batchIterator };
}
use of org.apache.carbondata.processing.newflow.row.CarbonRow in project carbondata by apache.
the class DataWriterBatchProcessorStepImpl method processBatch.
private void processBatch(CarbonRowBatch batch, CarbonFactHandler dataHandler) throws Exception {
int batchSize = 0;
while (batch.hasNext()) {
CarbonRow row = batch.next();
dataHandler.addDataToStore(row);
batchSize++;
}
rowCounter.getAndAdd(batchSize);
}
use of org.apache.carbondata.processing.newflow.row.CarbonRow in project carbondata by apache.
the class DataWriterProcessorStepImpl method processBatch.
private void processBatch(CarbonRowBatch batch, CarbonFactHandler dataHandler) throws CarbonDataLoadingException {
try {
while (batch.hasNext()) {
CarbonRow row = batch.next();
dataHandler.addDataToStore(row);
readCounter++;
}
} catch (Exception e) {
throw new CarbonDataLoadingException("unable to generate the mdkey", e);
}
rowCounter.getAndAdd(batch.getSize());
}
use of org.apache.carbondata.processing.newflow.row.CarbonRow in project carbondata by apache.
the class CarbonFactDataHandlerColumnar method processDataRows.
/**
* generate the NodeHolder from the input rows (one page in case of V3 format)
*/
private NodeHolder processDataRows(List<CarbonRow> dataRows) throws CarbonDataWriterException, KeyGenException {
if (dataRows.size() == 0) {
return new NodeHolder();
}
TablePage tablePage = new TablePage(model, dataRows.size());
IndexKey keys = new IndexKey(dataRows.size());
int rowId = 0;
// convert row to columnar data
for (CarbonRow row : dataRows) {
tablePage.addRow(rowId, row);
keys.update(rowId, row);
rowId++;
}
// encode and compress dimensions and measure
// TODO: To make the encoding more transparent to the user, user should be enable to specify
// the encoding and compression method for each type when creating table.
Codec codec = new Codec(model.getMeasureDataType());
IndexStorage[] dimColumns = codec.encodeAndCompressDimensions(tablePage);
Codec encodedMeasure = codec.encodeAndCompressMeasures(tablePage);
// prepare nullBitSet for writer, remove this after writer can accept TablePage
BitSet[] nullBitSet = new BitSet[tablePage.getMeasurePage().length];
FixLengthColumnPage[] measurePages = tablePage.getMeasurePage();
for (int i = 0; i < nullBitSet.length; i++) {
nullBitSet[i] = measurePages[i].getNullBitSet();
}
LOGGER.info("Number Of records processed: " + dataRows.size());
// TODO: writer interface should be modified to use TablePage
return dataWriter.buildDataNodeHolder(dimColumns, encodedMeasure.getEncodedMeasure(), dataRows.size(), keys.startKey, keys.endKey, encodedMeasure.getCompressionModel(), keys.packedNoDictStartKey, keys.packedNoDictEndKey, nullBitSet);
}
Aggregations