Search in sources :

Example 1 with CarbonRow

use of org.apache.carbondata.processing.newflow.row.CarbonRow in project carbondata by apache.

the class UnsafeParallelReadMergeSorterImpl method sort.

@Override
public Iterator<CarbonRowBatch>[] sort(Iterator<CarbonRowBatch>[] iterators) throws CarbonDataLoadingException {
    int inMemoryChunkSizeInMB = CarbonProperties.getInstance().getSortMemoryChunkSizeInMB();
    UnsafeSortDataRows sortDataRow = new UnsafeSortDataRows(sortParameters, unsafeIntermediateFileMerger, inMemoryChunkSizeInMB);
    final int batchSize = CarbonProperties.getInstance().getBatchSize();
    try {
        sortDataRow.initialize();
    } catch (CarbonSortKeyAndGroupByException e) {
        throw new CarbonDataLoadingException(e);
    }
    this.executorService = Executors.newFixedThreadPool(iterators.length);
    this.threadStatusObserver = new ThreadStatusObserver(this.executorService);
    try {
        for (int i = 0; i < iterators.length; i++) {
            executorService.submit(new SortIteratorThread(iterators[i], sortDataRow, batchSize, rowCounter, this.threadStatusObserver));
        }
        executorService.shutdown();
        executorService.awaitTermination(2, TimeUnit.DAYS);
        processRowToNextStep(sortDataRow, sortParameters);
    } catch (Exception e) {
        checkError();
        throw new CarbonDataLoadingException("Problem while shutdown the server ", e);
    }
    checkError();
    try {
        unsafeIntermediateFileMerger.finish();
        List<UnsafeCarbonRowPage> rowPages = unsafeIntermediateFileMerger.getRowPages();
        finalMerger.startFinalMerge(rowPages.toArray(new UnsafeCarbonRowPage[rowPages.size()]), unsafeIntermediateFileMerger.getMergedPages());
    } catch (CarbonDataWriterException e) {
        throw new CarbonDataLoadingException(e);
    } catch (CarbonSortKeyAndGroupByException e) {
        throw new CarbonDataLoadingException(e);
    }
    // Creates the iterator to read from merge sorter.
    Iterator<CarbonRowBatch> batchIterator = new CarbonIterator<CarbonRowBatch>() {

        @Override
        public boolean hasNext() {
            return finalMerger.hasNext();
        }

        @Override
        public CarbonRowBatch next() {
            int counter = 0;
            CarbonRowBatch rowBatch = new CarbonRowBatch(batchSize);
            while (finalMerger.hasNext() && counter < batchSize) {
                rowBatch.addRow(new CarbonRow(finalMerger.next()));
                counter++;
            }
            return rowBatch;
        }
    };
    return new Iterator[] { batchIterator };
}
Also used : CarbonRowBatch(org.apache.carbondata.processing.newflow.row.CarbonRowBatch) CarbonDataLoadingException(org.apache.carbondata.processing.newflow.exception.CarbonDataLoadingException) CarbonRow(org.apache.carbondata.processing.newflow.row.CarbonRow) UnsafeCarbonRowPage(org.apache.carbondata.processing.newflow.sort.unsafe.UnsafeCarbonRowPage) CarbonDataWriterException(org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException) CarbonSortKeyAndGroupByException(org.apache.carbondata.processing.sortandgroupby.exception.CarbonSortKeyAndGroupByException) CarbonDataWriterException(org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException) CarbonDataLoadingException(org.apache.carbondata.processing.newflow.exception.CarbonDataLoadingException) CarbonIterator(org.apache.carbondata.common.CarbonIterator) CarbonIterator(org.apache.carbondata.common.CarbonIterator) Iterator(java.util.Iterator) CarbonSortKeyAndGroupByException(org.apache.carbondata.processing.sortandgroupby.exception.CarbonSortKeyAndGroupByException) UnsafeSortDataRows(org.apache.carbondata.processing.newflow.sort.unsafe.UnsafeSortDataRows)

Example 2 with CarbonRow

use of org.apache.carbondata.processing.newflow.row.CarbonRow in project carbondata by apache.

the class ParallelReadMergeSorterImpl method sort.

@Override
public Iterator<CarbonRowBatch>[] sort(Iterator<CarbonRowBatch>[] iterators) throws CarbonDataLoadingException {
    SortDataRows sortDataRow = new SortDataRows(sortParameters, intermediateFileMerger);
    final int batchSize = CarbonProperties.getInstance().getBatchSize();
    try {
        sortDataRow.initialize();
    } catch (CarbonSortKeyAndGroupByException e) {
        throw new CarbonDataLoadingException(e);
    }
    this.executorService = Executors.newFixedThreadPool(iterators.length);
    this.threadStatusObserver = new ThreadStatusObserver(executorService);
    try {
        for (int i = 0; i < iterators.length; i++) {
            executorService.submit(new SortIteratorThread(iterators[i], sortDataRow, batchSize, rowCounter, threadStatusObserver));
        }
        executorService.shutdown();
        executorService.awaitTermination(2, TimeUnit.DAYS);
        processRowToNextStep(sortDataRow, sortParameters);
    } catch (Exception e) {
        checkError();
        throw new CarbonDataLoadingException("Problem while shutdown the server ", e);
    }
    checkError();
    try {
        intermediateFileMerger.finish();
        intermediateFileMerger = null;
        finalMerger.startFinalMerge();
    } catch (CarbonDataWriterException e) {
        throw new CarbonDataLoadingException(e);
    } catch (CarbonSortKeyAndGroupByException e) {
        throw new CarbonDataLoadingException(e);
    }
    // Creates the iterator to read from merge sorter.
    Iterator<CarbonRowBatch> batchIterator = new CarbonIterator<CarbonRowBatch>() {

        @Override
        public boolean hasNext() {
            return finalMerger.hasNext();
        }

        @Override
        public CarbonRowBatch next() {
            int counter = 0;
            CarbonRowBatch rowBatch = new CarbonRowBatch(batchSize);
            while (finalMerger.hasNext() && counter < batchSize) {
                rowBatch.addRow(new CarbonRow(finalMerger.next()));
                counter++;
            }
            return rowBatch;
        }
    };
    return new Iterator[] { batchIterator };
}
Also used : CarbonRowBatch(org.apache.carbondata.processing.newflow.row.CarbonRowBatch) CarbonDataLoadingException(org.apache.carbondata.processing.newflow.exception.CarbonDataLoadingException) CarbonRow(org.apache.carbondata.processing.newflow.row.CarbonRow) CarbonDataWriterException(org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException) CarbonSortKeyAndGroupByException(org.apache.carbondata.processing.sortandgroupby.exception.CarbonSortKeyAndGroupByException) CarbonDataWriterException(org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException) CarbonDataLoadingException(org.apache.carbondata.processing.newflow.exception.CarbonDataLoadingException) SortDataRows(org.apache.carbondata.processing.sortandgroupby.sortdata.SortDataRows) CarbonIterator(org.apache.carbondata.common.CarbonIterator) CarbonIterator(org.apache.carbondata.common.CarbonIterator) Iterator(java.util.Iterator) CarbonSortKeyAndGroupByException(org.apache.carbondata.processing.sortandgroupby.exception.CarbonSortKeyAndGroupByException)

Example 3 with CarbonRow

use of org.apache.carbondata.processing.newflow.row.CarbonRow in project carbondata by apache.

the class DataWriterBatchProcessorStepImpl method processBatch.

private void processBatch(CarbonRowBatch batch, CarbonFactHandler dataHandler) throws Exception {
    int batchSize = 0;
    while (batch.hasNext()) {
        CarbonRow row = batch.next();
        dataHandler.addDataToStore(row);
        batchSize++;
    }
    rowCounter.getAndAdd(batchSize);
}
Also used : CarbonRow(org.apache.carbondata.processing.newflow.row.CarbonRow)

Example 4 with CarbonRow

use of org.apache.carbondata.processing.newflow.row.CarbonRow in project carbondata by apache.

the class DataWriterProcessorStepImpl method processBatch.

private void processBatch(CarbonRowBatch batch, CarbonFactHandler dataHandler) throws CarbonDataLoadingException {
    try {
        while (batch.hasNext()) {
            CarbonRow row = batch.next();
            dataHandler.addDataToStore(row);
            readCounter++;
        }
    } catch (Exception e) {
        throw new CarbonDataLoadingException("unable to generate the mdkey", e);
    }
    rowCounter.getAndAdd(batch.getSize());
}
Also used : CarbonDataLoadingException(org.apache.carbondata.processing.newflow.exception.CarbonDataLoadingException) CarbonRow(org.apache.carbondata.processing.newflow.row.CarbonRow) CarbonDataWriterException(org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException) IOException(java.io.IOException) CarbonDataLoadingException(org.apache.carbondata.processing.newflow.exception.CarbonDataLoadingException)

Example 5 with CarbonRow

use of org.apache.carbondata.processing.newflow.row.CarbonRow in project carbondata by apache.

the class CarbonFactDataHandlerColumnar method processDataRows.

/**
   * generate the NodeHolder from the input rows (one page in case of V3 format)
   */
private NodeHolder processDataRows(List<CarbonRow> dataRows) throws CarbonDataWriterException, KeyGenException {
    if (dataRows.size() == 0) {
        return new NodeHolder();
    }
    TablePage tablePage = new TablePage(model, dataRows.size());
    IndexKey keys = new IndexKey(dataRows.size());
    int rowId = 0;
    // convert row to columnar data
    for (CarbonRow row : dataRows) {
        tablePage.addRow(rowId, row);
        keys.update(rowId, row);
        rowId++;
    }
    // encode and compress dimensions and measure
    // TODO: To make the encoding more transparent to the user, user should be enable to specify
    // the encoding and compression method for each type when creating table.
    Codec codec = new Codec(model.getMeasureDataType());
    IndexStorage[] dimColumns = codec.encodeAndCompressDimensions(tablePage);
    Codec encodedMeasure = codec.encodeAndCompressMeasures(tablePage);
    // prepare nullBitSet for writer, remove this after writer can accept TablePage
    BitSet[] nullBitSet = new BitSet[tablePage.getMeasurePage().length];
    FixLengthColumnPage[] measurePages = tablePage.getMeasurePage();
    for (int i = 0; i < nullBitSet.length; i++) {
        nullBitSet[i] = measurePages[i].getNullBitSet();
    }
    LOGGER.info("Number Of records processed: " + dataRows.size());
    // TODO: writer interface should be modified to use TablePage
    return dataWriter.buildDataNodeHolder(dimColumns, encodedMeasure.getEncodedMeasure(), dataRows.size(), keys.startKey, keys.endKey, encodedMeasure.getCompressionModel(), keys.packedNoDictStartKey, keys.packedNoDictEndKey, nullBitSet);
}
Also used : CarbonRow(org.apache.carbondata.processing.newflow.row.CarbonRow) BitSet(java.util.BitSet) FixLengthColumnPage(org.apache.carbondata.core.datastore.page.FixLengthColumnPage) IndexStorage(org.apache.carbondata.core.datastore.columnar.IndexStorage) NodeHolder(org.apache.carbondata.core.util.NodeHolder)

Aggregations

CarbonRow (org.apache.carbondata.processing.newflow.row.CarbonRow)9 CarbonDataLoadingException (org.apache.carbondata.processing.newflow.exception.CarbonDataLoadingException)5 CarbonDataWriterException (org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException)5 IOException (java.io.IOException)3 CarbonRowBatch (org.apache.carbondata.processing.newflow.row.CarbonRowBatch)3 CarbonSortKeyAndGroupByException (org.apache.carbondata.processing.sortandgroupby.exception.CarbonSortKeyAndGroupByException)3 Iterator (java.util.Iterator)2 CarbonIterator (org.apache.carbondata.common.CarbonIterator)2 BitSet (java.util.BitSet)1 IndexStorage (org.apache.carbondata.core.datastore.columnar.IndexStorage)1 FixLengthColumnPage (org.apache.carbondata.core.datastore.page.FixLengthColumnPage)1 KeyGenException (org.apache.carbondata.core.keygenerator.KeyGenException)1 NodeHolder (org.apache.carbondata.core.util.NodeHolder)1 UnsafeCarbonRowPage (org.apache.carbondata.processing.newflow.sort.unsafe.UnsafeCarbonRowPage)1 UnsafeSortDataRows (org.apache.carbondata.processing.newflow.sort.unsafe.UnsafeSortDataRows)1 SortDataRows (org.apache.carbondata.processing.sortandgroupby.sortdata.SortDataRows)1