Search in sources :

Example 21 with AbsoluteTableIdentifier

use of org.apache.carbondata.core.metadata.AbsoluteTableIdentifier in project carbondata by apache.

the class SegmentTaskIndexStoreTest method setUp.

@BeforeClass
public static void setUp() {
    CacheProvider cacheProvider = CacheProvider.getInstance();
    taskIndexStore = (SegmentTaskIndexStore) cacheProvider.<TableSegmentUniqueIdentifier, SegmentTaskIndexWrapper>createCache(CacheType.DRIVER_BTREE, "");
    tableBlockInfo = new TableBlockInfo("file", 0L, "SG100", locations, 10L, ColumnarFormatVersion.valueOf(version));
    absoluteTableIdentifier = new AbsoluteTableIdentifier("/tmp", new CarbonTableIdentifier("testdatabase", "testtable", "TB100"));
}
Also used : TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) CarbonTableIdentifier(org.apache.carbondata.core.metadata.CarbonTableIdentifier) SegmentTaskIndexWrapper(org.apache.carbondata.core.datastore.block.SegmentTaskIndexWrapper) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) CacheProvider(org.apache.carbondata.core.cache.CacheProvider) BeforeClass(org.junit.BeforeClass)

Example 22 with AbsoluteTableIdentifier

use of org.apache.carbondata.core.metadata.AbsoluteTableIdentifier in project carbondata by apache.

the class AbsoluteTableIdentifierTest method setup.

@BeforeClass
public static void setup() {
    absoluteTableIdentifier = new AbsoluteTableIdentifier("storePath", new CarbonTableIdentifier("databaseName", "tableName", "tableId"));
    absoluteTableIdentifier1 = new AbsoluteTableIdentifier("dummy", null);
    absoluteTableIdentifier2 = new AbsoluteTableIdentifier("dumgfhmy", null);
    absoluteTableIdentifier3 = new AbsoluteTableIdentifier("duhgmmy", new CarbonTableIdentifier("dummy", "dumy", "dmy"));
    absoluteTableIdentifier4 = new AbsoluteTableIdentifier("storePath", new CarbonTableIdentifier("databaseName", "tableName", "tableId"));
}
Also used : CarbonTableIdentifier(org.apache.carbondata.core.metadata.CarbonTableIdentifier) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) BeforeClass(org.junit.BeforeClass)

Example 23 with AbsoluteTableIdentifier

use of org.apache.carbondata.core.metadata.AbsoluteTableIdentifier in project carbondata by apache.

the class CarbonInputFormat method getSplits.

/**
   * {@inheritDoc}
   * Configurations FileInputFormat.INPUT_DIR, CarbonInputFormat.INPUT_SEGMENT_NUMBERS
   * are used to get table path to read.
   *
   * @return
   * @throws IOException
   */
private List<InputSplit> getSplits(JobContext job, FilterResolverIntf filterResolver, BitSet matchedPartitions, CacheClient cacheClient) throws IOException {
    List<InputSplit> result = new LinkedList<InputSplit>();
    FilterExpressionProcessor filterExpressionProcessor = new FilterExpressionProcessor();
    AbsoluteTableIdentifier absoluteTableIdentifier = getCarbonTable(job.getConfiguration()).getAbsoluteTableIdentifier();
    SegmentUpdateStatusManager updateStatusManager = new SegmentUpdateStatusManager(absoluteTableIdentifier);
    //for each segment fetch blocks matching filter in Driver BTree
    for (String segmentNo : getSegmentsToAccess(job)) {
        List<DataRefNode> dataRefNodes = getDataBlocksOfSegment(job, filterExpressionProcessor, absoluteTableIdentifier, filterResolver, matchedPartitions, segmentNo, cacheClient, updateStatusManager);
        for (DataRefNode dataRefNode : dataRefNodes) {
            BlockBTreeLeafNode leafNode = (BlockBTreeLeafNode) dataRefNode;
            TableBlockInfo tableBlockInfo = leafNode.getTableBlockInfo();
            if (CarbonUtil.isInvalidTableBlock(tableBlockInfo, updateStatusManager.getInvalidTimestampRange(tableBlockInfo.getSegmentId()), updateStatusManager)) {
                continue;
            }
            result.add(new CarbonInputSplit(segmentNo, new Path(tableBlockInfo.getFilePath()), tableBlockInfo.getBlockOffset(), tableBlockInfo.getBlockLength(), tableBlockInfo.getLocations(), tableBlockInfo.getBlockletInfos().getNoOfBlockLets(), tableBlockInfo.getVersion()));
        }
    }
    return result;
}
Also used : Path(org.apache.hadoop.fs.Path) CarbonTablePath(org.apache.carbondata.core.util.path.CarbonTablePath) CarbonStorePath(org.apache.carbondata.core.util.path.CarbonStorePath) SegmentUpdateStatusManager(org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager) TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) DataRefNode(org.apache.carbondata.core.datastore.DataRefNode) FilterExpressionProcessor(org.apache.carbondata.core.scan.filter.FilterExpressionProcessor) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) InputSplit(org.apache.hadoop.mapreduce.InputSplit) BlockBTreeLeafNode(org.apache.carbondata.core.datastore.impl.btree.BlockBTreeLeafNode)

Example 24 with AbsoluteTableIdentifier

use of org.apache.carbondata.core.metadata.AbsoluteTableIdentifier in project carbondata by apache.

the class CarbonInputFormat method getFileStatus.

private void getFileStatus(JobContext job, String[] segmentsToConsider, String[] filesToConsider, List<FileStatus> result) throws IOException {
    String[] partitionsToConsider = getValidPartitions(job);
    if (partitionsToConsider.length == 0) {
        throw new IOException("No partitions/data found");
    }
    PathFilter inputFilter = getDataFileFilter();
    AbsoluteTableIdentifier absIdentifier = getAbsoluteTableIdentifier(job.getConfiguration());
    CarbonTablePath tablePath = getTablePath(absIdentifier);
    // get tokens for all the required FileSystem for table path
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] { tablePath }, job.getConfiguration());
    //get all data files of valid partitions and segments
    for (int i = 0; i < partitionsToConsider.length; ++i) {
        String partition = partitionsToConsider[i];
        for (int j = 0; j < segmentsToConsider.length; ++j) {
            String segmentId = segmentsToConsider[j];
            String dataDirectoryPath = absIdentifier.appendWithLocalPrefix(tablePath.getCarbonDataDirectoryPath(partition, segmentId));
            if (filesToConsider.length == 0) {
                Path segmentPath = new Path(dataDirectoryPath);
                FileSystem fs = segmentPath.getFileSystem(job.getConfiguration());
                getFileStatusInternal(inputFilter, fs, segmentPath, result);
            } else {
                for (int k = 0; k < filesToConsider.length; ++k) {
                    String dataPath = absIdentifier.appendWithLocalPrefix(tablePath.getCarbonDataDirectoryPath(partition, segmentId) + File.separator + filesToConsider[k]);
                    Path filePath = new Path(dataPath);
                    FileSystem fs = filePath.getFileSystem(job.getConfiguration());
                    getFileStatusInternal(inputFilter, fs, filePath, result);
                }
            }
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) CarbonTablePath(org.apache.carbondata.core.util.path.CarbonTablePath) CarbonStorePath(org.apache.carbondata.core.util.path.CarbonStorePath) PathFilter(org.apache.hadoop.fs.PathFilter) CarbonTablePath(org.apache.carbondata.core.util.path.CarbonTablePath) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) FileSystem(org.apache.hadoop.fs.FileSystem) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) IOException(java.io.IOException)

Example 25 with AbsoluteTableIdentifier

use of org.apache.carbondata.core.metadata.AbsoluteTableIdentifier in project carbondata by apache.

the class MapredCarbonInputFormat method getQueryModel.

public QueryModel getQueryModel(Configuration configuration) throws IOException {
    CarbonTable carbonTable = getCarbonTable(configuration);
    // getting the table absoluteTableIdentifier from the carbonTable
    // to avoid unnecessary deserialization
    AbsoluteTableIdentifier identifier = carbonTable.getAbsoluteTableIdentifier();
    // query plan includes projection column
    String projection = getColumnProjection(configuration);
    if (projection == null) {
        projection = configuration.get("hive.io.file.readcolumn.names");
    }
    CarbonQueryPlan queryPlan = CarbonInputFormatUtil.createQueryPlan(carbonTable, projection);
    QueryModel queryModel = QueryModel.createModel(identifier, queryPlan, carbonTable);
    // set the filter to the query model in order to filter blocklet before scan
    Expression filter = getFilterPredicates(configuration);
    CarbonInputFormatUtil.processFilterExpression(filter, carbonTable);
    FilterResolverIntf filterIntf = CarbonInputFormatUtil.resolveFilter(filter, identifier);
    queryModel.setFilterExpressionResolverTree(filterIntf);
    return queryModel;
}
Also used : CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) CarbonQueryPlan(org.apache.carbondata.core.scan.model.CarbonQueryPlan) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) Expression(org.apache.carbondata.core.scan.expression.Expression) QueryModel(org.apache.carbondata.core.scan.model.QueryModel) FilterResolverIntf(org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf)

Aggregations

AbsoluteTableIdentifier (org.apache.carbondata.core.metadata.AbsoluteTableIdentifier)26 IOException (java.io.IOException)13 ArrayList (java.util.ArrayList)11 CarbonTablePath (org.apache.carbondata.core.util.path.CarbonTablePath)10 SegmentStatusManager (org.apache.carbondata.core.statusmanager.SegmentStatusManager)8 ICarbonLock (org.apache.carbondata.core.locks.ICarbonLock)7 LoadMetadataDetails (org.apache.carbondata.core.statusmanager.LoadMetadataDetails)7 TableBlockInfo (org.apache.carbondata.core.datastore.block.TableBlockInfo)6 CarbonTableIdentifier (org.apache.carbondata.core.metadata.CarbonTableIdentifier)6 File (java.io.File)5 CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)5 SegmentUpdateStatusManager (org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager)5 FilterExpressionProcessor (org.apache.carbondata.core.scan.filter.FilterExpressionProcessor)4 FilterResolverIntf (org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf)4 Path (org.apache.hadoop.fs.Path)4 Test (org.junit.Test)4 AbstractIndex (org.apache.carbondata.core.datastore.block.AbstractIndex)3 TableBlockUniqueIdentifier (org.apache.carbondata.core.datastore.block.TableBlockUniqueIdentifier)3 BlockBTreeLeafNode (org.apache.carbondata.core.datastore.impl.btree.BlockBTreeLeafNode)3 UpdateVO (org.apache.carbondata.core.mutate.UpdateVO)3