Search in sources :

Example 11 with CarbonTable

use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.

the class CarbonFactDataHandlerModel method getCarbonDataFolderLocation.

/**
   * This method will get the store location for the given path, segment id and partition id
   *
   * @return data directory path
   */
private static String getCarbonDataFolderLocation(CarbonDataLoadConfiguration configuration) {
    String carbonStorePath = CarbonProperties.getInstance().getProperty(CarbonCommonConstants.STORE_LOCATION_HDFS);
    CarbonTableIdentifier tableIdentifier = configuration.getTableIdentifier().getCarbonTableIdentifier();
    CarbonTable carbonTable = CarbonMetadata.getInstance().getCarbonTable(tableIdentifier.getDatabaseName() + CarbonCommonConstants.UNDERSCORE + tableIdentifier.getTableName());
    CarbonTablePath carbonTablePath = CarbonStorePath.getCarbonTablePath(carbonStorePath, carbonTable.getCarbonTableIdentifier());
    return carbonTablePath.getCarbonDataDirectoryPath(configuration.getPartitionId(), configuration.getSegmentId() + "");
}
Also used : CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) CarbonTableIdentifier(org.apache.carbondata.core.metadata.CarbonTableIdentifier) CarbonTablePath(org.apache.carbondata.core.util.path.CarbonTablePath)

Example 12 with CarbonTable

use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.

the class CarbonFactDataHandlerColumnar method initParameters.

private void initParameters(CarbonFactDataHandlerModel model) {
    this.colGrpModel = model.getSegmentProperties().getColumnGroupModel();
    //TODO need to pass carbon table identifier to metadata
    CarbonTable carbonTable = CarbonMetadata.getInstance().getCarbonTable(model.getDatabaseName() + CarbonCommonConstants.UNDERSCORE + model.getTableName());
    isDictDimension = CarbonUtil.identifyDimensionType(carbonTable.getDimensionByTableName(model.getTableName()));
    // need to convert it to byte array.
    if (model.isCompactionFlow()) {
        try {
            numberOfCores = Integer.parseInt(CarbonProperties.getInstance().getProperty(CarbonCommonConstants.NUM_CORES_COMPACTING, CarbonCommonConstants.NUM_CORES_DEFAULT_VAL));
        } catch (NumberFormatException exc) {
            LOGGER.error("Configured value for property " + CarbonCommonConstants.NUM_CORES_COMPACTING + "is wrong.Falling back to the default value " + CarbonCommonConstants.NUM_CORES_DEFAULT_VAL);
            numberOfCores = Integer.parseInt(CarbonCommonConstants.NUM_CORES_DEFAULT_VAL);
        }
    } else {
        try {
            numberOfCores = Integer.parseInt(CarbonProperties.getInstance().getProperty(CarbonCommonConstants.NUM_CORES_LOADING, CarbonCommonConstants.NUM_CORES_DEFAULT_VAL));
        } catch (NumberFormatException exc) {
            LOGGER.error("Configured value for property " + CarbonCommonConstants.NUM_CORES_LOADING + "is wrong.Falling back to the default value " + CarbonCommonConstants.NUM_CORES_DEFAULT_VAL);
            numberOfCores = Integer.parseInt(CarbonCommonConstants.NUM_CORES_DEFAULT_VAL);
        }
    }
    blockletProcessingCount = new AtomicInteger(0);
    producerExecutorService = Executors.newFixedThreadPool(numberOfCores);
    producerExecutorServiceTaskList = new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
    LOGGER.info("Initializing writer executors");
    consumerExecutorService = Executors.newFixedThreadPool(1);
    consumerExecutorServiceTaskList = new ArrayList<>(1);
    semaphore = new Semaphore(numberOfCores);
    blockletDataHolder = new BlockletDataHolder();
    // Start the consumer which will take each blocklet/page in order and write to a file
    Consumer consumer = new Consumer(blockletDataHolder);
    consumerExecutorServiceTaskList.add(consumerExecutorService.submit(consumer));
}
Also used : CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Semaphore(java.util.concurrent.Semaphore)

Example 13 with CarbonTable

use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.

the class CarbonDataProcessorUtil method getMeasureDataType.

public static DataType[] getMeasureDataType(int measureCount, String databaseName, String tableName) {
    DataType[] type = new DataType[measureCount];
    for (int i = 0; i < type.length; i++) {
        type[i] = DataType.DOUBLE;
    }
    CarbonTable carbonTable = CarbonMetadata.getInstance().getCarbonTable(databaseName + CarbonCommonConstants.UNDERSCORE + tableName);
    List<CarbonMeasure> measures = carbonTable.getMeasureByTableName(tableName);
    for (int i = 0; i < type.length; i++) {
        type[i] = measures.get(i).getDataType();
    }
    return type;
}
Also used : CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) CarbonMeasure(org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure) StructDataType(org.apache.carbondata.processing.datatypes.StructDataType) ArrayDataType(org.apache.carbondata.processing.datatypes.ArrayDataType) GenericDataType(org.apache.carbondata.processing.datatypes.GenericDataType) DataType(org.apache.carbondata.core.metadata.datatype.DataType) PrimitiveDataType(org.apache.carbondata.processing.datatypes.PrimitiveDataType)

Example 14 with CarbonTable

use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.

the class CarbonDataProcessorUtil method checkAndCreateCarbonStoreLocation.

/**
   * This method will get the store location for the given path, segment id and partition id
   *
   * @return data directory path
   */
public static String checkAndCreateCarbonStoreLocation(String factStoreLocation, String databaseName, String tableName, String partitionId, String segmentId) {
    CarbonTable carbonTable = CarbonMetadata.getInstance().getCarbonTable(databaseName + CarbonCommonConstants.UNDERSCORE + tableName);
    CarbonTableIdentifier carbonTableIdentifier = carbonTable.getCarbonTableIdentifier();
    CarbonTablePath carbonTablePath = CarbonStorePath.getCarbonTablePath(factStoreLocation, carbonTableIdentifier);
    String carbonDataDirectoryPath = carbonTablePath.getCarbonDataDirectoryPath(partitionId, segmentId);
    CarbonUtil.checkAndCreateFolder(carbonDataDirectoryPath);
    return carbonDataDirectoryPath;
}
Also used : CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) CarbonTableIdentifier(org.apache.carbondata.core.metadata.CarbonTableIdentifier) CarbonTablePath(org.apache.carbondata.core.util.path.CarbonTablePath)

Example 15 with CarbonTable

use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.

the class CarbonInputFormat method getSplits.

/**
   * {@inheritDoc}
   * Configurations FileInputFormat.INPUT_DIR
   * are used to get table path to read.
   *
   * @param job
   * @return List<InputSplit> list of CarbonInputSplit
   * @throws IOException
   */
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    AbsoluteTableIdentifier identifier = getAbsoluteTableIdentifier(job.getConfiguration());
    CacheClient cacheClient = new CacheClient(identifier.getStorePath());
    try {
        List<String> invalidSegments = new ArrayList<>();
        List<UpdateVO> invalidTimestampsList = new ArrayList<>();
        // get all valid segments and set them into the configuration
        if (getSegmentsToAccess(job).length == 0) {
            SegmentStatusManager segmentStatusManager = new SegmentStatusManager(identifier);
            SegmentStatusManager.ValidAndInvalidSegmentsInfo segments = segmentStatusManager.getValidAndInvalidSegments();
            SegmentUpdateStatusManager updateStatusManager = new SegmentUpdateStatusManager(identifier);
            setSegmentsToAccess(job.getConfiguration(), segments.getValidSegments());
            if (segments.getValidSegments().size() == 0) {
                return new ArrayList<>(0);
            }
            // remove entry in the segment index if there are invalid segments
            invalidSegments.addAll(segments.getInvalidSegments());
            for (String invalidSegmentId : invalidSegments) {
                invalidTimestampsList.add(updateStatusManager.getInvalidTimestampRange(invalidSegmentId));
            }
            if (invalidSegments.size() > 0) {
                List<TableSegmentUniqueIdentifier> invalidSegmentsIds = new ArrayList<>(invalidSegments.size());
                for (String segId : invalidSegments) {
                    invalidSegmentsIds.add(new TableSegmentUniqueIdentifier(identifier, segId));
                }
                cacheClient.getSegmentAccessClient().invalidateAll(invalidSegmentsIds);
            }
        }
        // process and resolve the expression
        Expression filter = getFilterPredicates(job.getConfiguration());
        CarbonTable carbonTable = getCarbonTable(job.getConfiguration());
        // this will be null in case of corrupt schema file.
        if (null == carbonTable) {
            throw new IOException("Missing/Corrupt schema file for table.");
        }
        CarbonInputFormatUtil.processFilterExpression(filter, carbonTable);
        // prune partitions for filter query on partition table
        BitSet matchedPartitions = null;
        if (null != filter) {
            PartitionInfo partitionInfo = carbonTable.getPartitionInfo(carbonTable.getFactTableName());
            if (null != partitionInfo) {
                Partitioner partitioner = PartitionUtil.getPartitioner(partitionInfo);
                matchedPartitions = new FilterExpressionProcessor().getFilteredPartitions(filter, partitionInfo, partitioner);
                if (matchedPartitions.cardinality() == 0) {
                    // no partition is required
                    return new ArrayList<InputSplit>();
                }
                if (matchedPartitions.cardinality() == partitioner.numPartitions()) {
                    // all partitions are required, no need to prune partitions
                    matchedPartitions = null;
                }
            }
        }
        FilterResolverIntf filterInterface = CarbonInputFormatUtil.resolveFilter(filter, identifier);
        // do block filtering and get split
        List<InputSplit> splits = getSplits(job, filterInterface, matchedPartitions, cacheClient);
        // pass the invalid segment to task side in order to remove index entry in task side
        if (invalidSegments.size() > 0) {
            for (InputSplit split : splits) {
                ((CarbonInputSplit) split).setInvalidSegments(invalidSegments);
                ((CarbonInputSplit) split).setInvalidTimestampRange(invalidTimestampsList);
            }
        }
        return splits;
    } finally {
        // close the cache cache client to clear LRU cache memory
        cacheClient.close();
    }
}
Also used : SegmentUpdateStatusManager(org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager) SegmentStatusManager(org.apache.carbondata.core.statusmanager.SegmentStatusManager) IOException(java.io.IOException) UpdateVO(org.apache.carbondata.core.mutate.UpdateVO) TableSegmentUniqueIdentifier(org.apache.carbondata.core.datastore.TableSegmentUniqueIdentifier) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) FilterExpressionProcessor(org.apache.carbondata.core.scan.filter.FilterExpressionProcessor) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) Expression(org.apache.carbondata.core.scan.expression.Expression) PartitionInfo(org.apache.carbondata.core.metadata.schema.PartitionInfo) InputSplit(org.apache.hadoop.mapreduce.InputSplit) Partitioner(org.apache.carbondata.core.scan.partition.Partitioner) FilterResolverIntf(org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf)

Aggregations

CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)37 CarbonDimension (org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension)13 ColumnSchema (org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema)9 CarbonMetadata (org.apache.carbondata.core.metadata.CarbonMetadata)8 ArrayList (java.util.ArrayList)7 CarbonTablePath (org.apache.carbondata.core.util.path.CarbonTablePath)6 IOException (java.io.IOException)5 AbsoluteTableIdentifier (org.apache.carbondata.core.metadata.AbsoluteTableIdentifier)5 CarbonTableIdentifier (org.apache.carbondata.core.metadata.CarbonTableIdentifier)5 TableInfo (org.apache.carbondata.core.metadata.schema.table.TableInfo)5 File (java.io.File)4 DataType (org.apache.carbondata.core.metadata.datatype.DataType)4 TableSchema (org.apache.carbondata.core.metadata.schema.table.TableSchema)4 CarbonMeasure (org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure)4 Expression (org.apache.carbondata.core.scan.expression.Expression)4 Test (org.junit.Test)4 FilterResolverIntf (org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf)3 CarbonQueryPlan (org.apache.carbondata.core.scan.model.CarbonQueryPlan)3 QueryModel (org.apache.carbondata.core.scan.model.QueryModel)3 LoadMetadataDetails (org.apache.carbondata.core.statusmanager.LoadMetadataDetails)3