use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.
the class CarbonDataMergerUtil method identifySegmentsToBeMergedBasedOnSize.
/**
* Identify the segments to be merged based on the Size in case of Major compaction.
*
* @param compactionSize compaction size in MB format
* @param listOfSegmentsAfterPreserve the segments list after
* preserving the configured number of latest loads
* @param carbonLoadModel carbon load model
* @param tablePath the store location of the segment
* @return the list of segments that need to be merged
* based on the Size in case of Major compaction
*/
private static List<LoadMetadataDetails> identifySegmentsToBeMergedBasedOnSize(long compactionSize, List<LoadMetadataDetails> listOfSegmentsAfterPreserve, CarbonLoadModel carbonLoadModel, String tablePath) throws IOException {
List<LoadMetadataDetails> segmentsToBeMerged = new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
CarbonTable carbonTable = carbonLoadModel.getCarbonDataLoadSchema().getCarbonTable();
// total length
long totalLength = 0;
// check size of each segment , sum it up across partitions
for (LoadMetadataDetails segment : listOfSegmentsAfterPreserve) {
// compaction should skip streaming segments
if (segment.getSegmentStatus() == SegmentStatus.STREAMING || segment.getSegmentStatus() == SegmentStatus.STREAMING_FINISH) {
continue;
}
String segId = segment.getLoadName();
// variable to store one segment size across partition.
long sizeOfOneSegmentAcrossPartition;
if (segment.getSegmentFile() != null) {
sizeOfOneSegmentAcrossPartition = CarbonUtil.getSizeOfSegment(carbonTable.getTablePath(), new Segment(segId, segment.getSegmentFile()));
} else {
sizeOfOneSegmentAcrossPartition = getSizeOfSegment(carbonTable.getTablePath(), segId);
}
// if size of a segment is greater than the Major compaction size. then ignore it.
if (sizeOfOneSegmentAcrossPartition > (compactionSize * 1024 * 1024)) {
// if already 2 segments have been found for merging then stop scan here and merge.
if (segmentsToBeMerged.size() > 1) {
break;
} else {
// if only one segment is found then remove the earlier one in list.
// reset the total length to 0.
segmentsToBeMerged = new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
totalLength = 0;
continue;
}
}
totalLength += sizeOfOneSegmentAcrossPartition;
// in case of major compaction the size doesnt matter. all the segments will be merged.
if (totalLength < (compactionSize * 1024 * 1024)) {
segmentsToBeMerged.add(segment);
} else {
// if already 2 segments have been found for merging then stop scan here and merge.
if (segmentsToBeMerged.size() > 1) {
break;
} else {
// if only one segment is found then remove the earlier one in list and put this.
// reset the total length to the current identified segment.
segmentsToBeMerged = new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
segmentsToBeMerged.add(segment);
totalLength = sizeOfOneSegmentAcrossPartition;
}
}
}
return segmentsToBeMerged;
}
use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.
the class CarbonTableInputFormat method getSplitsOfOneSegment.
/**
* Read data in one segment. For alter table partition statement
* @param job
* @param targetSegment
* @param oldPartitionIdList get old partitionId before partitionInfo was changed
* @return
*/
public List<InputSplit> getSplitsOfOneSegment(JobContext job, String targetSegment, List<Integer> oldPartitionIdList, PartitionInfo partitionInfo) {
List<Segment> invalidSegments = new ArrayList<>();
List<UpdateVO> invalidTimestampsList = new ArrayList<>();
List<Segment> segmentList = new ArrayList<>();
segmentList.add(new Segment(targetSegment, null));
setSegmentsToAccess(job.getConfiguration(), segmentList);
try {
// process and resolve the expression
Expression filter = getFilterPredicates(job.getConfiguration());
CarbonTable carbonTable = getOrCreateCarbonTable(job.getConfiguration());
// this will be null in case of corrupt schema file.
if (null == carbonTable) {
throw new IOException("Missing/Corrupt schema file for table.");
}
carbonTable.processFilterExpression(filter, null, null);
TableProvider tableProvider = new SingleTableProvider(carbonTable);
// prune partitions for filter query on partition table
String partitionIds = job.getConfiguration().get(ALTER_PARTITION_ID);
// matchedPartitions records partitionIndex, not partitionId
BitSet matchedPartitions = null;
if (partitionInfo != null) {
matchedPartitions = setMatchedPartitions(partitionIds, filter, partitionInfo, oldPartitionIdList);
if (matchedPartitions != null) {
if (matchedPartitions.cardinality() == 0) {
return new ArrayList<InputSplit>();
} else if (matchedPartitions.cardinality() == partitionInfo.getNumPartitions()) {
matchedPartitions = null;
}
}
}
FilterResolverIntf filterInterface = carbonTable.resolveFilter(filter, tableProvider);
// do block filtering and get split
List<InputSplit> splits = getSplits(job, filterInterface, segmentList, matchedPartitions, partitionInfo, oldPartitionIdList, new SegmentUpdateStatusManager(carbonTable));
// pass the invalid segment to task side in order to remove index entry in task side
if (invalidSegments.size() > 0) {
for (InputSplit split : splits) {
((CarbonInputSplit) split).setInvalidSegments(invalidSegments);
((CarbonInputSplit) split).setInvalidTimestampRange(invalidTimestampsList);
}
}
return splits;
} catch (IOException e) {
throw new RuntimeException("Can't get splits of the target segment ", e);
}
}
use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.
the class StoreCreator method createTableAndLoadModel.
public static CarbonLoadModel createTableAndLoadModel() throws Exception {
String factFilePath = new File("../hadoop/src/test/resources/data.csv").getCanonicalPath();
File storeDir = new File(storePath);
CarbonUtil.deleteFoldersAndFiles(storeDir);
CarbonProperties.getInstance().addProperty(CarbonCommonConstants.STORE_LOCATION_HDFS, storePath);
CarbonTable table = createTable(absoluteTableIdentifier);
writeDictionary(factFilePath, table);
return buildCarbonLoadModel(table, factFilePath, absoluteTableIdentifier);
}
use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.
the class CarbondataMetadata method getTableMetadata.
private ConnectorTableMetadata getTableMetadata(SchemaTableName schemaTableName) {
if (!listSchemaNamesInternal().contains(schemaTableName.getSchemaName())) {
throw new SchemaNotFoundException(schemaTableName.getSchemaName());
}
CarbonTable carbonTable = carbonTableReader.getTable(schemaTableName);
List<ColumnMetadata> columnsMetaList = new LinkedList<>();
List<CarbonColumn> carbonColumns = carbonTable.getCreateOrderColumn(schemaTableName.getTableName());
for (CarbonColumn col : carbonColumns) {
// show columns command will return these data
Type columnType = carbonDataType2SpiMapper(col.getColumnSchema());
ColumnMetadata columnMeta = new ColumnMetadata(col.getColumnSchema().getColumnName(), columnType);
columnsMetaList.add(columnMeta);
}
// carbondata connector's table metadata
return new ConnectorTableMetadata(schemaTableName, columnsMetaList);
}
use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.
the class CarbonTableReader method getInputSplits2.
public List<CarbonLocalInputSplit> getInputSplits2(CarbonTableCacheModel tableCacheModel, Expression filters) {
List<CarbonLocalInputSplit> result = new ArrayList<>();
if (config.getUnsafeMemoryInMb() != null) {
CarbonProperties.getInstance().addProperty(CarbonCommonConstants.UNSAFE_WORKING_MEMORY_IN_MB, config.getUnsafeMemoryInMb());
}
CarbonTable carbonTable = tableCacheModel.carbonTable;
TableInfo tableInfo = tableCacheModel.carbonTable.getTableInfo();
Configuration config = new Configuration();
config.set(CarbonTableInputFormat.INPUT_SEGMENT_NUMBERS, "");
String carbonTablePath = carbonTable.getAbsoluteTableIdentifier().getTablePath();
config.set(CarbonTableInputFormat.INPUT_DIR, carbonTablePath);
config.set(CarbonTableInputFormat.DATABASE_NAME, carbonTable.getDatabaseName());
config.set(CarbonTableInputFormat.TABLE_NAME, carbonTable.getTableName());
try {
CarbonTableInputFormat.setTableInfo(config, tableInfo);
CarbonTableInputFormat carbonTableInputFormat = createInputFormat(config, carbonTable.getAbsoluteTableIdentifier(), filters);
JobConf jobConf = new JobConf(config);
Job job = Job.getInstance(jobConf);
List<InputSplit> splits = carbonTableInputFormat.getSplits(job);
CarbonInputSplit carbonInputSplit = null;
Gson gson = new Gson();
if (splits != null && splits.size() > 0) {
for (InputSplit inputSplit : splits) {
carbonInputSplit = (CarbonInputSplit) inputSplit;
result.add(new CarbonLocalInputSplit(carbonInputSplit.getSegmentId(), carbonInputSplit.getPath().toString(), carbonInputSplit.getStart(), carbonInputSplit.getLength(), Arrays.asList(carbonInputSplit.getLocations()), carbonInputSplit.getNumberOfBlocklets(), carbonInputSplit.getVersion().number(), carbonInputSplit.getDeleteDeltaFiles(), gson.toJson(carbonInputSplit.getDetailInfo())));
}
}
} catch (IOException e) {
throw new RuntimeException("Error creating Splits from CarbonTableInputFormat", e);
}
return result;
}
Aggregations