use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.
the class CarbonFactDataHandlerModel method getCarbonDataFolderLocation.
/**
* This method will get the store location for the given path, segment id and partition id
*
* @return data directory path
*/
private static String getCarbonDataFolderLocation(CarbonDataLoadConfiguration configuration) {
String carbonStorePath = CarbonProperties.getInstance().getProperty(CarbonCommonConstants.STORE_LOCATION_HDFS);
CarbonTableIdentifier tableIdentifier = configuration.getTableIdentifier().getCarbonTableIdentifier();
CarbonTable carbonTable = CarbonMetadata.getInstance().getCarbonTable(tableIdentifier.getDatabaseName() + CarbonCommonConstants.UNDERSCORE + tableIdentifier.getTableName());
CarbonTablePath carbonTablePath = CarbonStorePath.getCarbonTablePath(carbonStorePath, carbonTable.getCarbonTableIdentifier());
return carbonTablePath.getCarbonDataDirectoryPath(configuration.getPartitionId(), configuration.getSegmentId() + "");
}
use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.
the class CarbonFactDataHandlerColumnar method initParameters.
private void initParameters(CarbonFactDataHandlerModel model) {
this.colGrpModel = model.getSegmentProperties().getColumnGroupModel();
//TODO need to pass carbon table identifier to metadata
CarbonTable carbonTable = CarbonMetadata.getInstance().getCarbonTable(model.getDatabaseName() + CarbonCommonConstants.UNDERSCORE + model.getTableName());
isDictDimension = CarbonUtil.identifyDimensionType(carbonTable.getDimensionByTableName(model.getTableName()));
// need to convert it to byte array.
if (model.isCompactionFlow()) {
try {
numberOfCores = Integer.parseInt(CarbonProperties.getInstance().getProperty(CarbonCommonConstants.NUM_CORES_COMPACTING, CarbonCommonConstants.NUM_CORES_DEFAULT_VAL));
} catch (NumberFormatException exc) {
LOGGER.error("Configured value for property " + CarbonCommonConstants.NUM_CORES_COMPACTING + "is wrong.Falling back to the default value " + CarbonCommonConstants.NUM_CORES_DEFAULT_VAL);
numberOfCores = Integer.parseInt(CarbonCommonConstants.NUM_CORES_DEFAULT_VAL);
}
} else {
try {
numberOfCores = Integer.parseInt(CarbonProperties.getInstance().getProperty(CarbonCommonConstants.NUM_CORES_LOADING, CarbonCommonConstants.NUM_CORES_DEFAULT_VAL));
} catch (NumberFormatException exc) {
LOGGER.error("Configured value for property " + CarbonCommonConstants.NUM_CORES_LOADING + "is wrong.Falling back to the default value " + CarbonCommonConstants.NUM_CORES_DEFAULT_VAL);
numberOfCores = Integer.parseInt(CarbonCommonConstants.NUM_CORES_DEFAULT_VAL);
}
}
blockletProcessingCount = new AtomicInteger(0);
producerExecutorService = Executors.newFixedThreadPool(numberOfCores);
producerExecutorServiceTaskList = new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
LOGGER.info("Initializing writer executors");
consumerExecutorService = Executors.newFixedThreadPool(1);
consumerExecutorServiceTaskList = new ArrayList<>(1);
semaphore = new Semaphore(numberOfCores);
blockletDataHolder = new BlockletDataHolder();
// Start the consumer which will take each blocklet/page in order and write to a file
Consumer consumer = new Consumer(blockletDataHolder);
consumerExecutorServiceTaskList.add(consumerExecutorService.submit(consumer));
}
use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.
the class CarbonDataProcessorUtil method getMeasureDataType.
public static DataType[] getMeasureDataType(int measureCount, String databaseName, String tableName) {
DataType[] type = new DataType[measureCount];
for (int i = 0; i < type.length; i++) {
type[i] = DataType.DOUBLE;
}
CarbonTable carbonTable = CarbonMetadata.getInstance().getCarbonTable(databaseName + CarbonCommonConstants.UNDERSCORE + tableName);
List<CarbonMeasure> measures = carbonTable.getMeasureByTableName(tableName);
for (int i = 0; i < type.length; i++) {
type[i] = measures.get(i).getDataType();
}
return type;
}
use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.
the class CarbonDataProcessorUtil method checkAndCreateCarbonStoreLocation.
/**
* This method will get the store location for the given path, segment id and partition id
*
* @return data directory path
*/
public static String checkAndCreateCarbonStoreLocation(String factStoreLocation, String databaseName, String tableName, String partitionId, String segmentId) {
CarbonTable carbonTable = CarbonMetadata.getInstance().getCarbonTable(databaseName + CarbonCommonConstants.UNDERSCORE + tableName);
CarbonTableIdentifier carbonTableIdentifier = carbonTable.getCarbonTableIdentifier();
CarbonTablePath carbonTablePath = CarbonStorePath.getCarbonTablePath(factStoreLocation, carbonTableIdentifier);
String carbonDataDirectoryPath = carbonTablePath.getCarbonDataDirectoryPath(partitionId, segmentId);
CarbonUtil.checkAndCreateFolder(carbonDataDirectoryPath);
return carbonDataDirectoryPath;
}
use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.
the class CarbonInputFormat method getSplits.
/**
* {@inheritDoc}
* Configurations FileInputFormat.INPUT_DIR
* are used to get table path to read.
*
* @param job
* @return List<InputSplit> list of CarbonInputSplit
* @throws IOException
*/
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
AbsoluteTableIdentifier identifier = getAbsoluteTableIdentifier(job.getConfiguration());
CacheClient cacheClient = new CacheClient(identifier.getStorePath());
try {
List<String> invalidSegments = new ArrayList<>();
List<UpdateVO> invalidTimestampsList = new ArrayList<>();
// get all valid segments and set them into the configuration
if (getSegmentsToAccess(job).length == 0) {
SegmentStatusManager segmentStatusManager = new SegmentStatusManager(identifier);
SegmentStatusManager.ValidAndInvalidSegmentsInfo segments = segmentStatusManager.getValidAndInvalidSegments();
SegmentUpdateStatusManager updateStatusManager = new SegmentUpdateStatusManager(identifier);
setSegmentsToAccess(job.getConfiguration(), segments.getValidSegments());
if (segments.getValidSegments().size() == 0) {
return new ArrayList<>(0);
}
// remove entry in the segment index if there are invalid segments
invalidSegments.addAll(segments.getInvalidSegments());
for (String invalidSegmentId : invalidSegments) {
invalidTimestampsList.add(updateStatusManager.getInvalidTimestampRange(invalidSegmentId));
}
if (invalidSegments.size() > 0) {
List<TableSegmentUniqueIdentifier> invalidSegmentsIds = new ArrayList<>(invalidSegments.size());
for (String segId : invalidSegments) {
invalidSegmentsIds.add(new TableSegmentUniqueIdentifier(identifier, segId));
}
cacheClient.getSegmentAccessClient().invalidateAll(invalidSegmentsIds);
}
}
// process and resolve the expression
Expression filter = getFilterPredicates(job.getConfiguration());
CarbonTable carbonTable = getCarbonTable(job.getConfiguration());
// this will be null in case of corrupt schema file.
if (null == carbonTable) {
throw new IOException("Missing/Corrupt schema file for table.");
}
CarbonInputFormatUtil.processFilterExpression(filter, carbonTable);
// prune partitions for filter query on partition table
BitSet matchedPartitions = null;
if (null != filter) {
PartitionInfo partitionInfo = carbonTable.getPartitionInfo(carbonTable.getFactTableName());
if (null != partitionInfo) {
Partitioner partitioner = PartitionUtil.getPartitioner(partitionInfo);
matchedPartitions = new FilterExpressionProcessor().getFilteredPartitions(filter, partitionInfo, partitioner);
if (matchedPartitions.cardinality() == 0) {
// no partition is required
return new ArrayList<InputSplit>();
}
if (matchedPartitions.cardinality() == partitioner.numPartitions()) {
// all partitions are required, no need to prune partitions
matchedPartitions = null;
}
}
}
FilterResolverIntf filterInterface = CarbonInputFormatUtil.resolveFilter(filter, identifier);
// do block filtering and get split
List<InputSplit> splits = getSplits(job, filterInterface, matchedPartitions, cacheClient);
// pass the invalid segment to task side in order to remove index entry in task side
if (invalidSegments.size() > 0) {
for (InputSplit split : splits) {
((CarbonInputSplit) split).setInvalidSegments(invalidSegments);
((CarbonInputSplit) split).setInvalidTimestampRange(invalidTimestampsList);
}
}
return splits;
} finally {
// close the cache cache client to clear LRU cache memory
cacheClient.close();
}
}
Aggregations