use of org.apache.carbondata.core.datastore.row.CarbonRow in project carbondata by apache.
the class DataConverterProcessorStepImpl method initializeSortColumnRangesPartitioner.
/**
* initialize partitioner for sort column ranges
*/
private void initializeSortColumnRangesPartitioner() {
// convert user specified sort-column ranges
SortColumnRangeInfo sortColumnRangeInfo = configuration.getSortColumnRangeInfo();
int rangeValueCnt = sortColumnRangeInfo.getUserSpecifiedRanges().length;
CarbonRow[] convertedSortColumnRanges = new CarbonRow[rangeValueCnt];
for (int i = 0; i < rangeValueCnt; i++) {
Object[] fakeOriginRow = new Object[configuration.getDataFields().length];
String[] oneBound = StringUtils.splitPreserveAllTokens(sortColumnRangeInfo.getUserSpecifiedRanges()[i], sortColumnRangeInfo.getSeparator(), -1);
// set the corresponding sort column
int j = 0;
for (int colIdx : sortColumnRangeInfo.getSortColumnIndex()) {
fakeOriginRow[colIdx] = oneBound[j++];
}
CarbonRow fakeCarbonRow = new CarbonRow(fakeOriginRow);
convertFakeRow(fakeCarbonRow, sortColumnRangeInfo);
convertedSortColumnRanges[i] = fakeCarbonRow;
}
// sort the range bounds (sort in carbon is a little different from what we think)
Arrays.sort(convertedSortColumnRanges, new RawRowComparator(sortColumnRangeInfo.getSortColumnIndex(), sortColumnRangeInfo.getIsSortColumnNoDict()));
// range partitioner to dispatch rows by sort columns
this.partitioner = new RangePartitionerImpl(convertedSortColumnRanges, new RawRowComparator(sortColumnRangeInfo.getSortColumnIndex(), sortColumnRangeInfo.getIsSortColumnNoDict()));
}
use of org.apache.carbondata.core.datastore.row.CarbonRow in project carbondata by apache.
the class DataConverterProcessorStepImpl method processRowBatch.
/**
* Process the batch of rows as per the step logic.
*
* @param rowBatch
* @return processed row.
*/
protected CarbonRowBatch processRowBatch(CarbonRowBatch rowBatch, RowConverter localConverter) {
while (rowBatch.hasNext()) {
CarbonRow convertRow = localConverter.convert(rowBatch.next());
if (isSortColumnRangeEnabled || isBucketColumnEnabled) {
short rangeNumber = (short) partitioner.getPartition(convertRow);
convertRow.setRangeId(rangeNumber);
}
rowBatch.setPreviousRow(convertRow);
}
rowCounter.getAndAdd(rowBatch.getSize());
// reuse the origin batch
rowBatch.rewind();
return rowBatch;
}
use of org.apache.carbondata.core.datastore.row.CarbonRow in project carbondata by apache.
the class DataWriterBatchProcessorStepImpl method processBatch.
private void processBatch(CarbonRowBatch batch, CarbonFactHandler dataHandler) throws Exception {
int batchSize = 0;
while (batch.hasNext()) {
CarbonRow row = batch.next();
dataHandler.addDataToStore(row);
batchSize++;
}
batch.close();
rowCounter.getAndAdd(batchSize);
}
use of org.apache.carbondata.core.datastore.row.CarbonRow in project carbondata by apache.
the class DataWriterProcessorStepImpl method processBatch.
private void processBatch(CarbonRowBatch batch, CarbonFactHandler dataHandler) throws CarbonDataLoadingException {
try {
while (batch.hasNext()) {
CarbonRow row = batch.next();
dataHandler.addDataToStore(row);
readCounter++;
}
} catch (Exception e) {
throw new CarbonDataLoadingException(e);
}
rowCounter.getAndAdd(batch.getSize());
}
use of org.apache.carbondata.core.datastore.row.CarbonRow in project carbondata by apache.
the class CarbonFactDataHandlerColumnar method processDataRows.
/**
* generate the EncodedTablePage from the input rows (one page in case of V3 format)
*/
private TablePage processDataRows(List<CarbonRow> dataRows) throws CarbonDataWriterException, KeyGenException, MemoryException, IOException {
if (dataRows.size() == 0) {
return new TablePage(model, 0);
}
TablePage tablePage = new TablePage(model, dataRows.size());
int rowId = 0;
// convert row to columnar data
for (CarbonRow row : dataRows) {
tablePage.addRow(rowId++, row);
}
tablePage.encode();
LOGGER.info("Number Of records processed: " + dataRows.size());
return tablePage;
}
Aggregations