Search in sources :

Example 1 with BucketingInfo

use of org.apache.carbondata.core.metadata.schema.BucketingInfo in project carbondata by apache.

the class DataConverterProcessorWithBucketingStepImpl method initialize.

@Override
public void initialize() throws IOException {
    child.initialize();
    converters = new ArrayList<>();
    badRecordLogger = createBadRecordLogger();
    RowConverter converter = new RowConverterImpl(child.getOutput(), configuration, badRecordLogger);
    configuration.setCardinalityFinder(converter);
    converters.add(converter);
    converter.initialize();
    List<Integer> indexes = new ArrayList<>();
    List<ColumnSchema> columnSchemas = new ArrayList<>();
    DataField[] inputDataFields = getOutput();
    BucketingInfo bucketingInfo = configuration.getBucketingInfo();
    for (int i = 0; i < inputDataFields.length; i++) {
        for (int j = 0; j < bucketingInfo.getListOfColumns().size(); j++) {
            if (inputDataFields[i].getColumn().getColName().equals(bucketingInfo.getListOfColumns().get(j).getColumnName())) {
                indexes.add(i);
                columnSchemas.add(inputDataFields[i].getColumn().getColumnSchema());
                break;
            }
        }
    }
    partitioner = new HashPartitionerImpl(indexes, columnSchemas, bucketingInfo.getNumberOfBuckets());
}
Also used : RowConverterImpl(org.apache.carbondata.processing.newflow.converter.impl.RowConverterImpl) HashPartitionerImpl(org.apache.carbondata.processing.newflow.partition.impl.HashPartitionerImpl) ArrayList(java.util.ArrayList) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) RowConverter(org.apache.carbondata.processing.newflow.converter.RowConverter) BucketingInfo(org.apache.carbondata.core.metadata.schema.BucketingInfo) DataField(org.apache.carbondata.processing.newflow.DataField)

Example 2 with BucketingInfo

use of org.apache.carbondata.core.metadata.schema.BucketingInfo in project carbondata by apache.

the class TableSchema method readFields.

@Override
public void readFields(DataInput in) throws IOException {
    this.tableId = in.readUTF();
    this.tableName = in.readUTF();
    int listSize = in.readInt();
    this.listOfColumns = new ArrayList<>(listSize);
    for (int i = 0; i < listSize; i++) {
        ColumnSchema schema = new ColumnSchema();
        schema.readFields(in);
        this.listOfColumns.add(schema);
    }
    int propertySize = in.readInt();
    this.tableProperties = new HashMap<String, String>(propertySize);
    for (int i = 0; i < propertySize; i++) {
        String key = in.readUTF();
        String value = in.readUTF();
        this.tableProperties.put(key, value);
    }
    boolean partitionExists = in.readBoolean();
    if (partitionExists) {
        this.partitionInfo = new PartitionInfo();
        this.partitionInfo.readFields(in);
    }
    boolean bucketingExists = in.readBoolean();
    if (bucketingExists) {
        this.bucketingInfo = new BucketingInfo();
        this.bucketingInfo.readFields(in);
    }
}
Also used : BucketingInfo(org.apache.carbondata.core.metadata.schema.BucketingInfo) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) PartitionInfo(org.apache.carbondata.core.metadata.schema.PartitionInfo)

Example 3 with BucketingInfo

use of org.apache.carbondata.core.metadata.schema.BucketingInfo in project carbondata by apache.

the class DataConverterProcessorStepImpl method initializeBucketColumnPartitioner.

/**
 * initialize partitioner for bucket column
 */
private void initializeBucketColumnPartitioner() {
    List<Integer> indexes = new ArrayList<>();
    List<ColumnSchema> columnSchemas = new ArrayList<>();
    DataField[] inputDataFields = getOutput();
    BucketingInfo bucketingInfo = configuration.getBucketingInfo();
    for (int i = 0; i < inputDataFields.length; i++) {
        for (int j = 0; j < bucketingInfo.getListOfColumns().size(); j++) {
            if (inputDataFields[i].getColumn().getColName().equals(bucketingInfo.getListOfColumns().get(j).getColumnName())) {
                indexes.add(i);
                columnSchemas.add(inputDataFields[i].getColumn().getColumnSchema());
                break;
            }
        }
    }
    // hash partitioner to dispatch rows by bucket column
    this.partitioner = new HashPartitionerImpl(indexes, columnSchemas, bucketingInfo.getNumOfRanges());
}
Also used : BucketingInfo(org.apache.carbondata.core.metadata.schema.BucketingInfo) DataField(org.apache.carbondata.processing.loading.DataField) HashPartitionerImpl(org.apache.carbondata.processing.loading.partition.impl.HashPartitionerImpl) ArrayList(java.util.ArrayList) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema)

Aggregations

BucketingInfo (org.apache.carbondata.core.metadata.schema.BucketingInfo)3 ColumnSchema (org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema)3 ArrayList (java.util.ArrayList)2 PartitionInfo (org.apache.carbondata.core.metadata.schema.PartitionInfo)1 DataField (org.apache.carbondata.processing.loading.DataField)1 HashPartitionerImpl (org.apache.carbondata.processing.loading.partition.impl.HashPartitionerImpl)1 DataField (org.apache.carbondata.processing.newflow.DataField)1 RowConverter (org.apache.carbondata.processing.newflow.converter.RowConverter)1 RowConverterImpl (org.apache.carbondata.processing.newflow.converter.impl.RowConverterImpl)1 HashPartitionerImpl (org.apache.carbondata.processing.newflow.partition.impl.HashPartitionerImpl)1