Search in sources :

Example 6 with TableBlockInfo

use of org.apache.carbondata.core.datastore.block.TableBlockInfo in project carbondata by apache.

the class CarbondataRecordSet method cursor.

/**
   * get data blocks via Carbondata QueryModel API
   */
@Override
public RecordCursor cursor() {
    List<TableBlockInfo> tableBlockInfoList = new ArrayList<TableBlockInfo>();
    tableBlockInfoList.add(new TableBlockInfo(split.getLocalInputSplit().getPath().toString(), split.getLocalInputSplit().getStart(), split.getLocalInputSplit().getSegmentId(), split.getLocalInputSplit().getLocations().toArray(new String[0]), split.getLocalInputSplit().getLength(), //blockletInfos,
    ColumnarFormatVersion.valueOf(split.getLocalInputSplit().getVersion())));
    queryModel.setTableBlockInfos(tableBlockInfoList);
    queryExecutor = QueryExecutorFactory.getQueryExecutor(queryModel);
    //queryModel.setQueryId(queryModel.getQueryId() + "_" + split.getLocalInputSplit().getSegmentId());
    try {
        readSupport.initialize(queryModel.getProjectionColumns(), queryModel.getAbsoluteTableIdentifier());
        CarbonIterator<Object[]> carbonIterator = new ChunkRowIterator((CarbonIterator<BatchResult>) queryExecutor.execute(queryModel));
        RecordCursor rc = new CarbondataRecordCursor(readSupport, carbonIterator, columns, split);
        return rc;
    } catch (QueryExecutionException e) {
        throw new RuntimeException(e.getMessage(), e);
    } catch (Exception ex) {
        throw new RuntimeException(ex.getMessage(), ex);
    }
}
Also used : TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) ChunkRowIterator(org.apache.carbondata.core.scan.result.iterator.ChunkRowIterator) QueryExecutionException(org.apache.carbondata.core.scan.executor.exception.QueryExecutionException) ArrayList(java.util.ArrayList) BatchResult(org.apache.carbondata.core.scan.result.BatchResult) QueryExecutionException(org.apache.carbondata.core.scan.executor.exception.QueryExecutionException)

Example 7 with TableBlockInfo

use of org.apache.carbondata.core.datastore.block.TableBlockInfo in project carbondata by apache.

the class CarbonHiveRecordReader method initialize.

public void initialize(InputSplit inputSplit, Configuration conf) throws IOException {
    // The input split can contain single HDFS block or multiple blocks, so firstly get all the
    // blocks and then set them in the query model.
    List<CarbonHiveInputSplit> splitList;
    if (inputSplit instanceof CarbonHiveInputSplit) {
        splitList = new ArrayList<>(1);
        splitList.add((CarbonHiveInputSplit) inputSplit);
    } else {
        throw new RuntimeException("unsupported input split type: " + inputSplit);
    }
    List<TableBlockInfo> tableBlockInfoList = CarbonHiveInputSplit.createBlocks(splitList);
    queryModel.setTableBlockInfos(tableBlockInfoList);
    readSupport.initialize(queryModel.getProjectionColumns(), queryModel.getAbsoluteTableIdentifier());
    try {
        carbonIterator = new ChunkRowIterator(queryExecutor.execute(queryModel));
    } catch (QueryExecutionException e) {
        throw new IOException(e.getMessage(), e.getCause());
    }
    if (valueObj == null) {
        valueObj = new ArrayWritable(Writable.class, new Writable[queryModel.getProjectionColumns().length]);
    }
    final TypeInfo rowTypeInfo;
    final List<String> columnNames;
    List<TypeInfo> columnTypes;
    // Get column names and sort order
    final String colIds = conf.get("hive.io.file.readcolumn.ids");
    final String columnNameProperty = conf.get("hive.io.file.readcolumn.names");
    final String columnTypeProperty = conf.get(serdeConstants.LIST_COLUMN_TYPES);
    if (columnNameProperty.length() == 0) {
        columnNames = new ArrayList<String>();
    } else {
        columnNames = Arrays.asList(columnNameProperty.split(","));
    }
    if (columnTypeProperty.length() == 0) {
        columnTypes = new ArrayList<TypeInfo>();
    } else {
        columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    }
    String[] arraySelectedColId = colIds.split(",");
    List<TypeInfo> reqColTypes = new ArrayList<TypeInfo>();
    for (String anArrayColId : arraySelectedColId) {
        reqColTypes.add(columnTypes.get(Integer.parseInt(anArrayColId)));
    }
    // Create row related objects
    rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, reqColTypes);
    this.objInspector = new CarbonObjectInspector((StructTypeInfo) rowTypeInfo);
}
Also used : TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) ChunkRowIterator(org.apache.carbondata.core.scan.result.iterator.ChunkRowIterator) ArrayList(java.util.ArrayList) DateWritable(org.apache.hadoop.hive.serde2.io.DateWritable) Writable(org.apache.hadoop.io.Writable) LongWritable(org.apache.hadoop.io.LongWritable) TimestampWritable(org.apache.hadoop.hive.serde2.io.TimestampWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) ArrayWritable(org.apache.hadoop.io.ArrayWritable) IntWritable(org.apache.hadoop.io.IntWritable) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) IOException(java.io.IOException) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) QueryExecutionException(org.apache.carbondata.core.scan.executor.exception.QueryExecutionException) ArrayWritable(org.apache.hadoop.io.ArrayWritable)

Example 8 with TableBlockInfo

use of org.apache.carbondata.core.datastore.block.TableBlockInfo in project carbondata by apache.

the class InMemoryBTreeIndex method filter.

@Override
public List<Block> filter(JobContext job, FilterResolverIntf filter) throws IOException {
    List<Block> result = new LinkedList<>();
    FilterExpressionProcessor filterExpressionProcessor = new FilterExpressionProcessor();
    AbsoluteTableIdentifier identifier = null;
    //for this segment fetch blocks matching filter in BTree
    List<DataRefNode> dataRefNodes = getDataBlocksOfSegment(job, filterExpressionProcessor, identifier, filter);
    for (DataRefNode dataRefNode : dataRefNodes) {
        BlockBTreeLeafNode leafNode = (BlockBTreeLeafNode) dataRefNode;
        TableBlockInfo tableBlockInfo = leafNode.getTableBlockInfo();
        result.add(new CarbonInputSplit(segment.getId(), new Path(tableBlockInfo.getFilePath()), tableBlockInfo.getBlockOffset(), tableBlockInfo.getBlockLength(), tableBlockInfo.getLocations(), tableBlockInfo.getBlockletInfos().getNoOfBlockLets(), tableBlockInfo.getVersion()));
    }
    return result;
}
Also used : Path(org.apache.hadoop.fs.Path) TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) FilterExpressionProcessor(org.apache.carbondata.core.scan.filter.FilterExpressionProcessor) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) Block(org.apache.carbondata.hadoop.internal.index.Block) DataRefNode(org.apache.carbondata.core.datastore.DataRefNode) CarbonInputSplit(org.apache.carbondata.hadoop.CarbonInputSplit) LinkedList(java.util.LinkedList) BlockBTreeLeafNode(org.apache.carbondata.core.datastore.impl.btree.BlockBTreeLeafNode)

Example 9 with TableBlockInfo

use of org.apache.carbondata.core.datastore.block.TableBlockInfo in project carbondata by apache.

the class InMemoryBTreeIndex method getSegmentAbstractIndexs.

private Map<SegmentTaskIndexStore.TaskBucketHolder, AbstractIndex> getSegmentAbstractIndexs(JobContext job, AbsoluteTableIdentifier identifier) throws IOException {
    Map<SegmentTaskIndexStore.TaskBucketHolder, AbstractIndex> segmentIndexMap = null;
    CacheClient cacheClient = new CacheClient(identifier.getStorePath());
    TableSegmentUniqueIdentifier segmentUniqueIdentifier = new TableSegmentUniqueIdentifier(identifier, segment.getId());
    try {
        SegmentTaskIndexWrapper segmentTaskIndexWrapper = cacheClient.getSegmentAccessClient().getIfPresent(segmentUniqueIdentifier);
        if (null != segmentTaskIndexWrapper) {
            segmentIndexMap = segmentTaskIndexWrapper.getTaskIdToTableSegmentMap();
        }
        // if segment tree is not loaded, load the segment tree
        if (segmentIndexMap == null) {
            List<TableBlockInfo> tableBlockInfoList = getTableBlockInfo(job);
            Map<String, List<TableBlockInfo>> segmentToTableBlocksInfos = new HashMap<>();
            segmentToTableBlocksInfos.put(segment.getId(), tableBlockInfoList);
            segmentUniqueIdentifier.setSegmentToTableBlocksInfos(segmentToTableBlocksInfos);
            // TODO: loadAndGetTaskIdToSegmentsMap can be optimized, use tableBlockInfoList as input
            // get Btree blocks for given segment
            segmentTaskIndexWrapper = cacheClient.getSegmentAccessClient().get(segmentUniqueIdentifier);
            segmentIndexMap = segmentTaskIndexWrapper.getTaskIdToTableSegmentMap();
        }
    } finally {
        cacheClient.close();
    }
    return segmentIndexMap;
}
Also used : CacheClient(org.apache.carbondata.hadoop.CacheClient) TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) SegmentTaskIndexWrapper(org.apache.carbondata.core.datastore.block.SegmentTaskIndexWrapper) HashMap(java.util.HashMap) AbstractIndex(org.apache.carbondata.core.datastore.block.AbstractIndex) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) List(java.util.List) TableSegmentUniqueIdentifier(org.apache.carbondata.core.datastore.TableSegmentUniqueIdentifier)

Example 10 with TableBlockInfo

use of org.apache.carbondata.core.datastore.block.TableBlockInfo in project carbondata by apache.

the class CarbonLoaderUtilTest method initSet2.

void initSet2() {
    blockInfos = new ArrayList<>();
    activeNode = new ArrayList<>();
    activeNode.add("node-7");
    activeNode.add("node-9");
    activeNode.add("node-11");
    String[] location = { "node-7", "node-11" };
    blockInfos.add(new TableBlockInfo("node", 1, "1", location, 0));
    blockInfos.add(new TableBlockInfo("node", 2, "1", location, 0));
    blockInfos.add(new TableBlockInfo("node", 3, "1", location, 0));
    blockInfos.add(new TableBlockInfo("node", 4, "1", location, 0));
    blockInfos.add(new TableBlockInfo("node", 5, "1", location, 0));
    blockInfos.add(new TableBlockInfo("node", 6, "1", location, 0));
    expected = new HashMap<>();
    expected.put("node-7", blockInfos.subList(0, 2));
    expected.put("node-9", blockInfos.subList(2, 4));
    expected.put("node-11", blockInfos.subList(4, 6));
}
Also used : TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo)

Aggregations

TableBlockInfo (org.apache.carbondata.core.datastore.block.TableBlockInfo)33 ArrayList (java.util.ArrayList)19 Test (org.junit.Test)11 HashMap (java.util.HashMap)10 List (java.util.List)9 LinkedList (java.util.LinkedList)7 AbstractIndex (org.apache.carbondata.core.datastore.block.AbstractIndex)7 DataFileFooter (org.apache.carbondata.core.metadata.blocklet.DataFileFooter)7 AbsoluteTableIdentifier (org.apache.carbondata.core.metadata.AbsoluteTableIdentifier)6 LinkedHashMap (java.util.LinkedHashMap)5 IOException (java.io.IOException)4 SegmentTaskIndexWrapper (org.apache.carbondata.core.datastore.block.SegmentTaskIndexWrapper)4 CarbonTableIdentifier (org.apache.carbondata.core.metadata.CarbonTableIdentifier)4 File (java.io.File)3 Map (java.util.Map)3 MockUp (mockit.MockUp)3 BlockInfo (org.apache.carbondata.core.datastore.block.BlockInfo)3 TableBlockUniqueIdentifier (org.apache.carbondata.core.datastore.block.TableBlockUniqueIdentifier)3 SegmentInfo (org.apache.carbondata.core.metadata.blocklet.SegmentInfo)3 QueryExecutionException (org.apache.carbondata.core.scan.executor.exception.QueryExecutionException)3