Search in sources :

Example 1 with PartitionInfo

use of org.apache.carbondata.core.metadata.schema.PartitionInfo in project carbondata by apache.

the class ThriftWrapperSchemaConverterImpl method fromWrapperToExternalPartitionInfo.

private org.apache.carbondata.format.PartitionInfo fromWrapperToExternalPartitionInfo(PartitionInfo wrapperPartitionInfo) {
    List<org.apache.carbondata.format.ColumnSchema> thriftColumnSchema = new ArrayList<org.apache.carbondata.format.ColumnSchema>();
    for (ColumnSchema wrapperColumnSchema : wrapperPartitionInfo.getColumnSchemaList()) {
        thriftColumnSchema.add(fromWrapperToExternalColumnSchema(wrapperColumnSchema));
    }
    org.apache.carbondata.format.PartitionInfo externalPartitionInfo = new org.apache.carbondata.format.PartitionInfo(thriftColumnSchema, fromWrapperToExternalPartitionType(wrapperPartitionInfo.getPartitionType()));
    externalPartitionInfo.setList_info(wrapperPartitionInfo.getListInfo());
    externalPartitionInfo.setRange_info(wrapperPartitionInfo.getRangeInfo());
    externalPartitionInfo.setNum_partitions(wrapperPartitionInfo.getNumPartitions());
    return externalPartitionInfo;
}
Also used : ArrayList(java.util.ArrayList) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) PartitionInfo(org.apache.carbondata.core.metadata.schema.PartitionInfo)

Example 2 with PartitionInfo

use of org.apache.carbondata.core.metadata.schema.PartitionInfo in project carbondata by apache.

the class CarbonInputFormat method getSplits.

/**
   * {@inheritDoc}
   * Configurations FileInputFormat.INPUT_DIR
   * are used to get table path to read.
   *
   * @param job
   * @return List<InputSplit> list of CarbonInputSplit
   * @throws IOException
   */
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    AbsoluteTableIdentifier identifier = getAbsoluteTableIdentifier(job.getConfiguration());
    CacheClient cacheClient = new CacheClient(identifier.getStorePath());
    try {
        List<String> invalidSegments = new ArrayList<>();
        List<UpdateVO> invalidTimestampsList = new ArrayList<>();
        // get all valid segments and set them into the configuration
        if (getSegmentsToAccess(job).length == 0) {
            SegmentStatusManager segmentStatusManager = new SegmentStatusManager(identifier);
            SegmentStatusManager.ValidAndInvalidSegmentsInfo segments = segmentStatusManager.getValidAndInvalidSegments();
            SegmentUpdateStatusManager updateStatusManager = new SegmentUpdateStatusManager(identifier);
            setSegmentsToAccess(job.getConfiguration(), segments.getValidSegments());
            if (segments.getValidSegments().size() == 0) {
                return new ArrayList<>(0);
            }
            // remove entry in the segment index if there are invalid segments
            invalidSegments.addAll(segments.getInvalidSegments());
            for (String invalidSegmentId : invalidSegments) {
                invalidTimestampsList.add(updateStatusManager.getInvalidTimestampRange(invalidSegmentId));
            }
            if (invalidSegments.size() > 0) {
                List<TableSegmentUniqueIdentifier> invalidSegmentsIds = new ArrayList<>(invalidSegments.size());
                for (String segId : invalidSegments) {
                    invalidSegmentsIds.add(new TableSegmentUniqueIdentifier(identifier, segId));
                }
                cacheClient.getSegmentAccessClient().invalidateAll(invalidSegmentsIds);
            }
        }
        // process and resolve the expression
        Expression filter = getFilterPredicates(job.getConfiguration());
        CarbonTable carbonTable = getCarbonTable(job.getConfiguration());
        // this will be null in case of corrupt schema file.
        if (null == carbonTable) {
            throw new IOException("Missing/Corrupt schema file for table.");
        }
        CarbonInputFormatUtil.processFilterExpression(filter, carbonTable);
        // prune partitions for filter query on partition table
        BitSet matchedPartitions = null;
        if (null != filter) {
            PartitionInfo partitionInfo = carbonTable.getPartitionInfo(carbonTable.getFactTableName());
            if (null != partitionInfo) {
                Partitioner partitioner = PartitionUtil.getPartitioner(partitionInfo);
                matchedPartitions = new FilterExpressionProcessor().getFilteredPartitions(filter, partitionInfo, partitioner);
                if (matchedPartitions.cardinality() == 0) {
                    // no partition is required
                    return new ArrayList<InputSplit>();
                }
                if (matchedPartitions.cardinality() == partitioner.numPartitions()) {
                    // all partitions are required, no need to prune partitions
                    matchedPartitions = null;
                }
            }
        }
        FilterResolverIntf filterInterface = CarbonInputFormatUtil.resolveFilter(filter, identifier);
        // do block filtering and get split
        List<InputSplit> splits = getSplits(job, filterInterface, matchedPartitions, cacheClient);
        // pass the invalid segment to task side in order to remove index entry in task side
        if (invalidSegments.size() > 0) {
            for (InputSplit split : splits) {
                ((CarbonInputSplit) split).setInvalidSegments(invalidSegments);
                ((CarbonInputSplit) split).setInvalidTimestampRange(invalidTimestampsList);
            }
        }
        return splits;
    } finally {
        // close the cache cache client to clear LRU cache memory
        cacheClient.close();
    }
}
Also used : SegmentUpdateStatusManager(org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager) SegmentStatusManager(org.apache.carbondata.core.statusmanager.SegmentStatusManager) IOException(java.io.IOException) UpdateVO(org.apache.carbondata.core.mutate.UpdateVO) TableSegmentUniqueIdentifier(org.apache.carbondata.core.datastore.TableSegmentUniqueIdentifier) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) FilterExpressionProcessor(org.apache.carbondata.core.scan.filter.FilterExpressionProcessor) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) Expression(org.apache.carbondata.core.scan.expression.Expression) PartitionInfo(org.apache.carbondata.core.metadata.schema.PartitionInfo) InputSplit(org.apache.hadoop.mapreduce.InputSplit) Partitioner(org.apache.carbondata.core.scan.partition.Partitioner) FilterResolverIntf(org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf)

Example 3 with PartitionInfo

use of org.apache.carbondata.core.metadata.schema.PartitionInfo in project carbondata by apache.

the class ThriftWrapperSchemaConverterImpl method fromExternalToWrapperPartitionInfo.

private PartitionInfo fromExternalToWrapperPartitionInfo(org.apache.carbondata.format.PartitionInfo externalPartitionInfo) {
    List<ColumnSchema> wrapperColumnSchema = new ArrayList<ColumnSchema>();
    for (org.apache.carbondata.format.ColumnSchema columnSchema : externalPartitionInfo.getPartition_columns()) {
        wrapperColumnSchema.add(fromExternalToWrapperColumnSchema(columnSchema));
    }
    PartitionInfo wrapperPartitionInfo = new PartitionInfo(wrapperColumnSchema, fromExternalToWrapperPartitionType(externalPartitionInfo.getPartition_type()));
    wrapperPartitionInfo.setListInfo(externalPartitionInfo.getList_info());
    wrapperPartitionInfo.setRangeInfo(externalPartitionInfo.getRange_info());
    wrapperPartitionInfo.setNumPartitions(externalPartitionInfo.getNum_partitions());
    return wrapperPartitionInfo;
}
Also used : ArrayList(java.util.ArrayList) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) PartitionInfo(org.apache.carbondata.core.metadata.schema.PartitionInfo)

Aggregations

PartitionInfo (org.apache.carbondata.core.metadata.schema.PartitionInfo)3 ArrayList (java.util.ArrayList)2 ColumnSchema (org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema)2 IOException (java.io.IOException)1 TableSegmentUniqueIdentifier (org.apache.carbondata.core.datastore.TableSegmentUniqueIdentifier)1 AbsoluteTableIdentifier (org.apache.carbondata.core.metadata.AbsoluteTableIdentifier)1 CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)1 UpdateVO (org.apache.carbondata.core.mutate.UpdateVO)1 Expression (org.apache.carbondata.core.scan.expression.Expression)1 FilterExpressionProcessor (org.apache.carbondata.core.scan.filter.FilterExpressionProcessor)1 FilterResolverIntf (org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf)1 Partitioner (org.apache.carbondata.core.scan.partition.Partitioner)1 SegmentStatusManager (org.apache.carbondata.core.statusmanager.SegmentStatusManager)1 SegmentUpdateStatusManager (org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager)1 InputSplit (org.apache.hadoop.mapreduce.InputSplit)1