Search in sources :

Example 1 with CarbonTable

use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.

the class IncrementalColumnDictionaryGenerator method writeDictionaryData.

@Override
public void writeDictionaryData(String tableUniqueName) throws IOException {
    // initialize params
    CarbonMetadata metadata = CarbonMetadata.getInstance();
    CarbonTable carbonTable = metadata.getCarbonTable(tableUniqueName);
    CarbonTableIdentifier tableIdentifier = carbonTable.getCarbonTableIdentifier();
    ColumnIdentifier columnIdentifier = dimension.getColumnIdentifier();
    String storePath = carbonTable.getStorePath();
    DictionaryService dictionaryService = CarbonCommonFactory.getDictionaryService();
    // create dictionary cache from dictionary File
    DictionaryColumnUniqueIdentifier identifier = new DictionaryColumnUniqueIdentifier(tableIdentifier, columnIdentifier, columnIdentifier.getDataType());
    Boolean isDictExists = CarbonUtil.isFileExistsForGivenColumn(storePath, identifier);
    Dictionary dictionary = null;
    long t1 = System.currentTimeMillis();
    if (isDictExists) {
        Cache<DictionaryColumnUniqueIdentifier, Dictionary> dictCache = CacheProvider.getInstance().createCache(CacheType.REVERSE_DICTIONARY, storePath);
        dictionary = dictCache.get(identifier);
    }
    long dictCacheTime = System.currentTimeMillis() - t1;
    long t2 = System.currentTimeMillis();
    // write dictionary
    CarbonDictionaryWriter dictionaryWriter = null;
    dictionaryWriter = dictionaryService.getDictionaryWriter(tableIdentifier, columnIdentifier, storePath);
    List<String> distinctValues = writeDictionary(dictionaryWriter, isDictExists);
    long dictWriteTime = System.currentTimeMillis() - t2;
    long t3 = System.currentTimeMillis();
    // write sort index
    if (distinctValues.size() > 0) {
        writeSortIndex(distinctValues, dictionary, dictionaryService, tableIdentifier, columnIdentifier, storePath);
    }
    long sortIndexWriteTime = System.currentTimeMillis() - t3;
    // update Meta Data
    updateMetaData(dictionaryWriter);
    LOGGER.audit("\n columnName: " + dimension.getColName() + "\n columnId: " + dimension.getColumnId() + "\n new distinct values count: " + distinctValues.size() + "\n create dictionary cache: " + dictCacheTime + "\n sort list, distinct and write: " + dictWriteTime + "\n write sort info: " + sortIndexWriteTime);
}
Also used : Dictionary(org.apache.carbondata.core.cache.dictionary.Dictionary) BiDictionary(org.apache.carbondata.core.devapi.BiDictionary) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) DictionaryService(org.apache.carbondata.core.service.DictionaryService) DictionaryColumnUniqueIdentifier(org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier) CarbonTableIdentifier(org.apache.carbondata.core.metadata.CarbonTableIdentifier) ColumnIdentifier(org.apache.carbondata.core.metadata.ColumnIdentifier) CarbonMetadata(org.apache.carbondata.core.metadata.CarbonMetadata) CarbonDictionaryWriter(org.apache.carbondata.core.writer.CarbonDictionaryWriter)

Example 2 with CarbonTable

use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.

the class ServerDictionaryGenerator method initializeGeneratorForTable.

public void initializeGeneratorForTable(DictionaryMessage key) {
    CarbonMetadata metadata = CarbonMetadata.getInstance();
    CarbonTable carbonTable = metadata.getCarbonTable(key.getTableUniqueName());
    CarbonDimension dimension = carbonTable.getPrimitiveDimensionByName(key.getTableUniqueName(), key.getColumnName());
    // initialize TableDictionaryGenerator first
    if (tableMap.get(key.getTableUniqueName()) == null) {
        tableMap.put(key.getTableUniqueName(), new TableDictionaryGenerator(dimension));
    } else {
        tableMap.get(key.getTableUniqueName()).updateGenerator(dimension);
    }
}
Also used : CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) CarbonMetadata(org.apache.carbondata.core.metadata.CarbonMetadata) CarbonDimension(org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension)

Example 3 with CarbonTable

use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.

the class QueryUtil method resolveQueryModel.

/**
   * Below method will be used to resolve the query model
   * resolve will be setting the actual dimension and measure object
   * as from driver only column name will be passes to avoid the heavy object
   * serialization
   *
   * @param queryModel query model
   */
public static void resolveQueryModel(QueryModel queryModel) {
    CarbonMetadata.getInstance().addCarbonTable(queryModel.getTable());
    // TODO need to load the table from table identifier
    CarbonTable carbonTable = queryModel.getTable();
    String tableName = queryModel.getAbsoluteTableIdentifier().getCarbonTableIdentifier().getTableName();
    // resolve query dimension
    for (QueryDimension queryDimension : queryModel.getQueryDimension()) {
        queryDimension.setDimension(carbonTable.getDimensionByName(tableName, queryDimension.getColumnName()));
    }
    // resolve query measure
    for (QueryMeasure queryMeasure : queryModel.getQueryMeasures()) {
        //never come false but if in future we can remove so not removing first if check
        if (queryMeasure.getColumnName().equals("count(*)")) {
            if (carbonTable.getMeasureByTableName(tableName).size() > 0 && !carbonTable.getMeasureByTableName(tableName).get(0).getColName().equals(CarbonCommonConstants.DEFAULT_INVISIBLE_DUMMY_MEASURE)) {
                queryMeasure.setMeasure(carbonTable.getMeasureByTableName(tableName).get(0));
            } else {
                CarbonMeasure dummyMeasure = new CarbonMeasure(carbonTable.getDimensionByTableName(tableName).get(0).getColumnSchema(), 0);
                queryMeasure.setMeasure(dummyMeasure);
            }
        } else {
            queryMeasure.setMeasure(carbonTable.getMeasureByName(tableName, queryMeasure.getColumnName()));
        }
    }
}
Also used : CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) CarbonMeasure(org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure) QueryMeasure(org.apache.carbondata.core.scan.model.QueryMeasure) QueryDimension(org.apache.carbondata.core.scan.model.QueryDimension)

Example 4 with CarbonTable

use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.

the class QueryUtil method getDictionaryColumnUniqueIdentifierList.

/**
   * Below method will be used to get the dictionary column unique identifier
   *
   * @param dictionaryColumnIdList dictionary
   * @param carbonTableIdentifier
   * @return
   */
private static List<DictionaryColumnUniqueIdentifier> getDictionaryColumnUniqueIdentifierList(List<String> dictionaryColumnIdList, CarbonTableIdentifier carbonTableIdentifier) {
    CarbonTable carbonTable = CarbonMetadata.getInstance().getCarbonTable(carbonTableIdentifier.getTableUniqueName());
    List<DictionaryColumnUniqueIdentifier> dictionaryColumnUniqueIdentifiers = new ArrayList<>(dictionaryColumnIdList.size());
    for (String columnId : dictionaryColumnIdList) {
        CarbonDimension dimension = CarbonMetadata.getInstance().getCarbonDimensionBasedOnColIdentifier(carbonTable, columnId);
        if (dimension != null) {
            dictionaryColumnUniqueIdentifiers.add(new DictionaryColumnUniqueIdentifier(carbonTableIdentifier, dimension.getColumnIdentifier(), dimension.getDataType()));
        }
    }
    return dictionaryColumnUniqueIdentifiers;
}
Also used : CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) DictionaryColumnUniqueIdentifier(org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier) ArrayList(java.util.ArrayList) CarbonDimension(org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension)

Example 5 with CarbonTable

use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.

the class CarbonInputFormat method getSplits.

/**
   * {@inheritDoc}
   * Configurations FileInputFormat.INPUT_DIR
   * are used to get table path to read.
   *
   * @param job
   * @return List<InputSplit> list of CarbonInputSplit
   * @throws IOException
   */
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    AbsoluteTableIdentifier identifier = getAbsoluteTableIdentifier(job.getConfiguration());
    CacheClient cacheClient = new CacheClient(identifier.getStorePath());
    try {
        List<String> invalidSegments = new ArrayList<>();
        List<UpdateVO> invalidTimestampsList = new ArrayList<>();
        // get all valid segments and set them into the configuration
        if (getSegmentsToAccess(job).length == 0) {
            SegmentStatusManager segmentStatusManager = new SegmentStatusManager(identifier);
            SegmentStatusManager.ValidAndInvalidSegmentsInfo segments = segmentStatusManager.getValidAndInvalidSegments();
            SegmentUpdateStatusManager updateStatusManager = new SegmentUpdateStatusManager(identifier);
            setSegmentsToAccess(job.getConfiguration(), segments.getValidSegments());
            if (segments.getValidSegments().size() == 0) {
                return new ArrayList<>(0);
            }
            // remove entry in the segment index if there are invalid segments
            invalidSegments.addAll(segments.getInvalidSegments());
            for (String invalidSegmentId : invalidSegments) {
                invalidTimestampsList.add(updateStatusManager.getInvalidTimestampRange(invalidSegmentId));
            }
            if (invalidSegments.size() > 0) {
                List<TableSegmentUniqueIdentifier> invalidSegmentsIds = new ArrayList<>(invalidSegments.size());
                for (String segId : invalidSegments) {
                    invalidSegmentsIds.add(new TableSegmentUniqueIdentifier(identifier, segId));
                }
                cacheClient.getSegmentAccessClient().invalidateAll(invalidSegmentsIds);
            }
        }
        // process and resolve the expression
        Expression filter = getFilterPredicates(job.getConfiguration());
        CarbonTable carbonTable = getCarbonTable(job.getConfiguration());
        // this will be null in case of corrupt schema file.
        if (null == carbonTable) {
            throw new IOException("Missing/Corrupt schema file for table.");
        }
        CarbonInputFormatUtil.processFilterExpression(filter, carbonTable);
        // prune partitions for filter query on partition table
        BitSet matchedPartitions = null;
        if (null != filter) {
            PartitionInfo partitionInfo = carbonTable.getPartitionInfo(carbonTable.getFactTableName());
            if (null != partitionInfo) {
                Partitioner partitioner = PartitionUtil.getPartitioner(partitionInfo);
                matchedPartitions = new FilterExpressionProcessor().getFilteredPartitions(filter, partitionInfo, partitioner);
                if (matchedPartitions.cardinality() == 0) {
                    // no partition is required
                    return new ArrayList<InputSplit>();
                }
                if (matchedPartitions.cardinality() == partitioner.numPartitions()) {
                    // all partitions are required, no need to prune partitions
                    matchedPartitions = null;
                }
            }
        }
        FilterResolverIntf filterInterface = CarbonInputFormatUtil.resolveFilter(filter, identifier);
        // do block filtering and get split
        List<InputSplit> splits = getSplits(job, filterInterface, matchedPartitions, cacheClient);
        // pass the invalid segment to task side in order to remove index entry in task side
        if (invalidSegments.size() > 0) {
            for (InputSplit split : splits) {
                ((CarbonInputSplit) split).setInvalidSegments(invalidSegments);
                ((CarbonInputSplit) split).setInvalidTimestampRange(invalidTimestampsList);
            }
        }
        return splits;
    } finally {
        // close the cache cache client to clear LRU cache memory
        cacheClient.close();
    }
}
Also used : SegmentUpdateStatusManager(org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager) SegmentStatusManager(org.apache.carbondata.core.statusmanager.SegmentStatusManager) IOException(java.io.IOException) UpdateVO(org.apache.carbondata.core.mutate.UpdateVO) TableSegmentUniqueIdentifier(org.apache.carbondata.core.datastore.TableSegmentUniqueIdentifier) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) FilterExpressionProcessor(org.apache.carbondata.core.scan.filter.FilterExpressionProcessor) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) Expression(org.apache.carbondata.core.scan.expression.Expression) PartitionInfo(org.apache.carbondata.core.metadata.schema.PartitionInfo) InputSplit(org.apache.hadoop.mapreduce.InputSplit) Partitioner(org.apache.carbondata.core.scan.partition.Partitioner) FilterResolverIntf(org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf)

Aggregations

CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)55 ArrayList (java.util.ArrayList)18 IOException (java.io.IOException)13 AbsoluteTableIdentifier (org.apache.carbondata.core.metadata.AbsoluteTableIdentifier)13 CarbonDimension (org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension)11 Expression (org.apache.carbondata.core.scan.expression.Expression)9 ColumnSchema (org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema)8 FilterResolverIntf (org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf)8 TableInfo (org.apache.carbondata.core.metadata.schema.table.TableInfo)7 LoadMetadataDetails (org.apache.carbondata.core.statusmanager.LoadMetadataDetails)7 File (java.io.File)6 Segment (org.apache.carbondata.core.datamap.Segment)6 CarbonTableIdentifier (org.apache.carbondata.core.metadata.CarbonTableIdentifier)6 SingleTableProvider (org.apache.carbondata.core.scan.filter.SingleTableProvider)6 TableProvider (org.apache.carbondata.core.scan.filter.TableProvider)6 Configuration (org.apache.hadoop.conf.Configuration)6 InputSplit (org.apache.hadoop.mapreduce.InputSplit)6 CarbonMetadata (org.apache.carbondata.core.metadata.CarbonMetadata)5 DataType (org.apache.carbondata.core.metadata.datatype.DataType)5 UpdateVO (org.apache.carbondata.core.mutate.UpdateVO)5