Search in sources :

Example 1 with CarbonRowBatch

use of org.apache.carbondata.processing.newflow.row.CarbonRowBatch in project carbondata by apache.

the class UnsafeParallelReadMergeSorterImpl method sort.

@Override
public Iterator<CarbonRowBatch>[] sort(Iterator<CarbonRowBatch>[] iterators) throws CarbonDataLoadingException {
    int inMemoryChunkSizeInMB = CarbonProperties.getInstance().getSortMemoryChunkSizeInMB();
    UnsafeSortDataRows sortDataRow = new UnsafeSortDataRows(sortParameters, unsafeIntermediateFileMerger, inMemoryChunkSizeInMB);
    final int batchSize = CarbonProperties.getInstance().getBatchSize();
    try {
        sortDataRow.initialize();
    } catch (CarbonSortKeyAndGroupByException e) {
        throw new CarbonDataLoadingException(e);
    }
    this.executorService = Executors.newFixedThreadPool(iterators.length);
    this.threadStatusObserver = new ThreadStatusObserver(this.executorService);
    try {
        for (int i = 0; i < iterators.length; i++) {
            executorService.submit(new SortIteratorThread(iterators[i], sortDataRow, batchSize, rowCounter, this.threadStatusObserver));
        }
        executorService.shutdown();
        executorService.awaitTermination(2, TimeUnit.DAYS);
        processRowToNextStep(sortDataRow, sortParameters);
    } catch (Exception e) {
        checkError();
        throw new CarbonDataLoadingException("Problem while shutdown the server ", e);
    }
    checkError();
    try {
        unsafeIntermediateFileMerger.finish();
        List<UnsafeCarbonRowPage> rowPages = unsafeIntermediateFileMerger.getRowPages();
        finalMerger.startFinalMerge(rowPages.toArray(new UnsafeCarbonRowPage[rowPages.size()]), unsafeIntermediateFileMerger.getMergedPages());
    } catch (CarbonDataWriterException e) {
        throw new CarbonDataLoadingException(e);
    } catch (CarbonSortKeyAndGroupByException e) {
        throw new CarbonDataLoadingException(e);
    }
    // Creates the iterator to read from merge sorter.
    Iterator<CarbonRowBatch> batchIterator = new CarbonIterator<CarbonRowBatch>() {

        @Override
        public boolean hasNext() {
            return finalMerger.hasNext();
        }

        @Override
        public CarbonRowBatch next() {
            int counter = 0;
            CarbonRowBatch rowBatch = new CarbonRowBatch(batchSize);
            while (finalMerger.hasNext() && counter < batchSize) {
                rowBatch.addRow(new CarbonRow(finalMerger.next()));
                counter++;
            }
            return rowBatch;
        }
    };
    return new Iterator[] { batchIterator };
}
Also used : CarbonRowBatch(org.apache.carbondata.processing.newflow.row.CarbonRowBatch) CarbonDataLoadingException(org.apache.carbondata.processing.newflow.exception.CarbonDataLoadingException) CarbonRow(org.apache.carbondata.processing.newflow.row.CarbonRow) UnsafeCarbonRowPage(org.apache.carbondata.processing.newflow.sort.unsafe.UnsafeCarbonRowPage) CarbonDataWriterException(org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException) CarbonSortKeyAndGroupByException(org.apache.carbondata.processing.sortandgroupby.exception.CarbonSortKeyAndGroupByException) CarbonDataWriterException(org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException) CarbonDataLoadingException(org.apache.carbondata.processing.newflow.exception.CarbonDataLoadingException) CarbonIterator(org.apache.carbondata.common.CarbonIterator) CarbonIterator(org.apache.carbondata.common.CarbonIterator) Iterator(java.util.Iterator) CarbonSortKeyAndGroupByException(org.apache.carbondata.processing.sortandgroupby.exception.CarbonSortKeyAndGroupByException) UnsafeSortDataRows(org.apache.carbondata.processing.newflow.sort.unsafe.UnsafeSortDataRows)

Example 2 with CarbonRowBatch

use of org.apache.carbondata.processing.newflow.row.CarbonRowBatch in project carbondata by apache.

the class ParallelReadMergeSorterImpl method sort.

@Override
public Iterator<CarbonRowBatch>[] sort(Iterator<CarbonRowBatch>[] iterators) throws CarbonDataLoadingException {
    SortDataRows sortDataRow = new SortDataRows(sortParameters, intermediateFileMerger);
    final int batchSize = CarbonProperties.getInstance().getBatchSize();
    try {
        sortDataRow.initialize();
    } catch (CarbonSortKeyAndGroupByException e) {
        throw new CarbonDataLoadingException(e);
    }
    this.executorService = Executors.newFixedThreadPool(iterators.length);
    this.threadStatusObserver = new ThreadStatusObserver(executorService);
    try {
        for (int i = 0; i < iterators.length; i++) {
            executorService.submit(new SortIteratorThread(iterators[i], sortDataRow, batchSize, rowCounter, threadStatusObserver));
        }
        executorService.shutdown();
        executorService.awaitTermination(2, TimeUnit.DAYS);
        processRowToNextStep(sortDataRow, sortParameters);
    } catch (Exception e) {
        checkError();
        throw new CarbonDataLoadingException("Problem while shutdown the server ", e);
    }
    checkError();
    try {
        intermediateFileMerger.finish();
        intermediateFileMerger = null;
        finalMerger.startFinalMerge();
    } catch (CarbonDataWriterException e) {
        throw new CarbonDataLoadingException(e);
    } catch (CarbonSortKeyAndGroupByException e) {
        throw new CarbonDataLoadingException(e);
    }
    // Creates the iterator to read from merge sorter.
    Iterator<CarbonRowBatch> batchIterator = new CarbonIterator<CarbonRowBatch>() {

        @Override
        public boolean hasNext() {
            return finalMerger.hasNext();
        }

        @Override
        public CarbonRowBatch next() {
            int counter = 0;
            CarbonRowBatch rowBatch = new CarbonRowBatch(batchSize);
            while (finalMerger.hasNext() && counter < batchSize) {
                rowBatch.addRow(new CarbonRow(finalMerger.next()));
                counter++;
            }
            return rowBatch;
        }
    };
    return new Iterator[] { batchIterator };
}
Also used : CarbonRowBatch(org.apache.carbondata.processing.newflow.row.CarbonRowBatch) CarbonDataLoadingException(org.apache.carbondata.processing.newflow.exception.CarbonDataLoadingException) CarbonRow(org.apache.carbondata.processing.newflow.row.CarbonRow) CarbonDataWriterException(org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException) CarbonSortKeyAndGroupByException(org.apache.carbondata.processing.sortandgroupby.exception.CarbonSortKeyAndGroupByException) CarbonDataWriterException(org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException) CarbonDataLoadingException(org.apache.carbondata.processing.newflow.exception.CarbonDataLoadingException) SortDataRows(org.apache.carbondata.processing.sortandgroupby.sortdata.SortDataRows) CarbonIterator(org.apache.carbondata.common.CarbonIterator) CarbonIterator(org.apache.carbondata.common.CarbonIterator) Iterator(java.util.Iterator) CarbonSortKeyAndGroupByException(org.apache.carbondata.processing.sortandgroupby.exception.CarbonSortKeyAndGroupByException)

Example 3 with CarbonRowBatch

use of org.apache.carbondata.processing.newflow.row.CarbonRowBatch in project carbondata by apache.

the class DataConverterProcessorStepImpl method processRowBatch.

/**
   * Process the batch of rows as per the step logic.
   *
   * @param rowBatch
   * @return processed row.
   */
protected CarbonRowBatch processRowBatch(CarbonRowBatch rowBatch, RowConverter localConverter) {
    CarbonRowBatch newBatch = new CarbonRowBatch(rowBatch.getSize());
    while (rowBatch.hasNext()) {
        newBatch.addRow(localConverter.convert(rowBatch.next()));
    }
    rowCounter.getAndAdd(newBatch.getSize());
    return newBatch;
}
Also used : CarbonRowBatch(org.apache.carbondata.processing.newflow.row.CarbonRowBatch)

Example 4 with CarbonRowBatch

use of org.apache.carbondata.processing.newflow.row.CarbonRowBatch in project carbondata by apache.

the class DataConverterProcessorWithBucketingStepImpl method processRowBatch.

/**
   * Process the batch of rows as per the step logic.
   *
   * @param rowBatch
   * @return processed row.
   */
protected CarbonRowBatch processRowBatch(CarbonRowBatch rowBatch, RowConverter localConverter) {
    CarbonRowBatch newBatch = new CarbonRowBatch(rowBatch.getSize());
    while (rowBatch.hasNext()) {
        CarbonRow next = rowBatch.next();
        short bucketNumber = (short) partitioner.getPartition(next.getData());
        CarbonRow convertRow = localConverter.convert(next);
        convertRow.bucketNumber = bucketNumber;
        newBatch.addRow(convertRow);
    }
    rowCounter.getAndAdd(newBatch.getSize());
    return newBatch;
}
Also used : CarbonRowBatch(org.apache.carbondata.processing.newflow.row.CarbonRowBatch) CarbonRow(org.apache.carbondata.processing.newflow.row.CarbonRow)

Example 5 with CarbonRowBatch

use of org.apache.carbondata.processing.newflow.row.CarbonRowBatch in project carbondata by apache.

the class DataWriterBatchProcessorStepImpl method execute.

@Override
public Iterator<CarbonRowBatch>[] execute() throws CarbonDataLoadingException {
    Iterator<CarbonRowBatch>[] iterators = child.execute();
    CarbonTableIdentifier tableIdentifier = configuration.getTableIdentifier().getCarbonTableIdentifier();
    String tableName = tableIdentifier.getTableName();
    try {
        CarbonTimeStatisticsFactory.getLoadStatisticsInstance().recordDictionaryValue2MdkAdd2FileTime(configuration.getPartitionId(), System.currentTimeMillis());
        int i = 0;
        for (Iterator<CarbonRowBatch> iterator : iterators) {
            String storeLocation = getStoreLocation(tableIdentifier, String.valueOf(i));
            int k = 0;
            while (iterator.hasNext()) {
                CarbonRowBatch next = iterator.next();
                CarbonFactDataHandlerModel model = CarbonFactDataHandlerModel.createCarbonFactDataHandlerModel(configuration, storeLocation, i, k++);
                CarbonFactHandler dataHandler = CarbonFactHandlerFactory.createCarbonFactHandler(model, CarbonFactHandlerFactory.FactHandlerType.COLUMNAR);
                dataHandler.initialise();
                processBatch(next, dataHandler);
                finish(tableName, dataHandler);
            }
            i++;
        }
    } catch (Exception e) {
        LOGGER.error(e, "Failed for table: " + tableName + " in DataWriterBatchProcessorStepImpl");
        throw new CarbonDataLoadingException("There is an unexpected error: " + e.getMessage());
    }
    return null;
}
Also used : CarbonRowBatch(org.apache.carbondata.processing.newflow.row.CarbonRowBatch) CarbonFactDataHandlerModel(org.apache.carbondata.processing.store.CarbonFactDataHandlerModel) CarbonTableIdentifier(org.apache.carbondata.core.metadata.CarbonTableIdentifier) CarbonDataLoadingException(org.apache.carbondata.processing.newflow.exception.CarbonDataLoadingException) Iterator(java.util.Iterator) CarbonFactHandler(org.apache.carbondata.processing.store.CarbonFactHandler) IOException(java.io.IOException) CarbonDataLoadingException(org.apache.carbondata.processing.newflow.exception.CarbonDataLoadingException)

Aggregations

CarbonRowBatch (org.apache.carbondata.processing.newflow.row.CarbonRowBatch)6 Iterator (java.util.Iterator)4 CarbonDataLoadingException (org.apache.carbondata.processing.newflow.exception.CarbonDataLoadingException)4 CarbonRow (org.apache.carbondata.processing.newflow.row.CarbonRow)3 CarbonDataWriterException (org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException)3 IOException (java.io.IOException)2 CarbonIterator (org.apache.carbondata.common.CarbonIterator)2 CarbonTableIdentifier (org.apache.carbondata.core.metadata.CarbonTableIdentifier)2 CarbonSortKeyAndGroupByException (org.apache.carbondata.processing.sortandgroupby.exception.CarbonSortKeyAndGroupByException)2 CarbonFactDataHandlerModel (org.apache.carbondata.processing.store.CarbonFactDataHandlerModel)2 CarbonFactHandler (org.apache.carbondata.processing.store.CarbonFactHandler)2 UnsafeCarbonRowPage (org.apache.carbondata.processing.newflow.sort.unsafe.UnsafeCarbonRowPage)1 UnsafeSortDataRows (org.apache.carbondata.processing.newflow.sort.unsafe.UnsafeSortDataRows)1 SortDataRows (org.apache.carbondata.processing.sortandgroupby.sortdata.SortDataRows)1