Search in sources :

Example 1 with SortColumnRangeInfo

use of org.apache.carbondata.core.metadata.schema.SortColumnRangeInfo in project carbondata by apache.

the class DataLoadProcessBuilder method setSortColumnInfo.

/**
 * set sort column info in configuration
 * @param carbonTable carbon table
 * @param loadModel load model
 * @param configuration configuration
 */
private static void setSortColumnInfo(CarbonTable carbonTable, CarbonLoadModel loadModel, CarbonDataLoadConfiguration configuration) {
    List<String> sortCols = carbonTable.getSortColumns(carbonTable.getTableName());
    SortScopeOptions.SortScope sortScope = SortScopeOptions.getSortScope(loadModel.getSortScope());
    if (!SortScopeOptions.SortScope.LOCAL_SORT.equals(sortScope) || sortCols.size() == 0 || StringUtils.isBlank(loadModel.getSortColumnsBoundsStr())) {
        if (!StringUtils.isBlank(loadModel.getSortColumnsBoundsStr())) {
            LOGGER.warn("sort column bounds will be ignored");
        }
        configuration.setSortColumnRangeInfo(null);
        return;
    }
    // column index for sort columns
    int[] sortColIndex = new int[sortCols.size()];
    boolean[] isSortColNoDict = new boolean[sortCols.size()];
    DataField[] outFields = configuration.getDataFields();
    int j = 0;
    boolean columnExist;
    for (String sortCol : sortCols) {
        columnExist = false;
        for (int i = 0; !columnExist && i < outFields.length; i++) {
            if (outFields[i].getColumn().getColName().equalsIgnoreCase(sortCol)) {
                columnExist = true;
                sortColIndex[j] = i;
                isSortColNoDict[j] = !outFields[i].hasDictionaryEncoding();
                j++;
            }
        }
        if (!columnExist) {
            throw new CarbonDataLoadingException("Field " + sortCol + " does not exist.");
        }
    }
    String[] sortColumnBounds = StringUtils.splitPreserveAllTokens(loadModel.getSortColumnsBoundsStr(), CarbonLoadOptionConstants.SORT_COLUMN_BOUNDS_ROW_DELIMITER, -1);
    for (String bound : sortColumnBounds) {
        String[] fieldInBounds = StringUtils.splitPreserveAllTokens(bound, CarbonLoadOptionConstants.SORT_COLUMN_BOUNDS_FIELD_DELIMITER, -1);
        if (fieldInBounds.length != sortCols.size()) {
            String msg = new StringBuilder("The number of field in bounds should be equal to that in sort columns.").append(" Expected ").append(sortCols.size()).append(", actual ").append(String.valueOf(fieldInBounds.length)).append(".").append(" The illegal bound is '").append(bound).append("'.").toString();
            throw new CarbonDataLoadingException(msg);
        }
    }
    SortColumnRangeInfo sortColumnRangeInfo = new SortColumnRangeInfo(sortColIndex, isSortColNoDict, sortColumnBounds, CarbonLoadOptionConstants.SORT_COLUMN_BOUNDS_FIELD_DELIMITER);
    configuration.setSortColumnRangeInfo(sortColumnRangeInfo);
}
Also used : CarbonDataLoadingException(org.apache.carbondata.processing.loading.exception.CarbonDataLoadingException) SortColumnRangeInfo(org.apache.carbondata.core.metadata.schema.SortColumnRangeInfo) SortScopeOptions(org.apache.carbondata.processing.loading.sort.SortScopeOptions)

Example 2 with SortColumnRangeInfo

use of org.apache.carbondata.core.metadata.schema.SortColumnRangeInfo in project carbondata by apache.

the class DataConverterProcessorStepImpl method initializeSortColumnRangesPartitioner.

/**
 * initialize partitioner for sort column ranges
 */
private void initializeSortColumnRangesPartitioner() {
    // convert user specified sort-column ranges
    SortColumnRangeInfo sortColumnRangeInfo = configuration.getSortColumnRangeInfo();
    int rangeValueCnt = sortColumnRangeInfo.getUserSpecifiedRanges().length;
    CarbonRow[] convertedSortColumnRanges = new CarbonRow[rangeValueCnt];
    for (int i = 0; i < rangeValueCnt; i++) {
        Object[] fakeOriginRow = new Object[configuration.getDataFields().length];
        String[] oneBound = StringUtils.splitPreserveAllTokens(sortColumnRangeInfo.getUserSpecifiedRanges()[i], sortColumnRangeInfo.getSeparator(), -1);
        // set the corresponding sort column
        int j = 0;
        for (int colIdx : sortColumnRangeInfo.getSortColumnIndex()) {
            fakeOriginRow[colIdx] = oneBound[j++];
        }
        CarbonRow fakeCarbonRow = new CarbonRow(fakeOriginRow);
        convertFakeRow(fakeCarbonRow, sortColumnRangeInfo);
        convertedSortColumnRanges[i] = fakeCarbonRow;
    }
    // sort the range bounds (sort in carbon is a little different from what we think)
    Arrays.sort(convertedSortColumnRanges, new RawRowComparator(sortColumnRangeInfo.getSortColumnIndex(), sortColumnRangeInfo.getIsSortColumnNoDict()));
    // range partitioner to dispatch rows by sort columns
    this.partitioner = new RangePartitionerImpl(convertedSortColumnRanges, new RawRowComparator(sortColumnRangeInfo.getSortColumnIndex(), sortColumnRangeInfo.getIsSortColumnNoDict()));
}
Also used : RawRowComparator(org.apache.carbondata.processing.loading.partition.impl.RawRowComparator) CarbonRow(org.apache.carbondata.core.datastore.row.CarbonRow) SortColumnRangeInfo(org.apache.carbondata.core.metadata.schema.SortColumnRangeInfo) RangePartitionerImpl(org.apache.carbondata.processing.loading.partition.impl.RangePartitionerImpl)

Aggregations

SortColumnRangeInfo (org.apache.carbondata.core.metadata.schema.SortColumnRangeInfo)2 CarbonRow (org.apache.carbondata.core.datastore.row.CarbonRow)1 CarbonDataLoadingException (org.apache.carbondata.processing.loading.exception.CarbonDataLoadingException)1 RangePartitionerImpl (org.apache.carbondata.processing.loading.partition.impl.RangePartitionerImpl)1 RawRowComparator (org.apache.carbondata.processing.loading.partition.impl.RawRowComparator)1 SortScopeOptions (org.apache.carbondata.processing.loading.sort.SortScopeOptions)1