use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.
the class CarbonDataMergerUtil method identifySegmentsToBeMergedBasedOnSegCount.
/**
* Identify the segments to be merged based on the segment count, the segment whose data size
* exceed minor compaction size threshold will not be compacted.
*
* @param listOfSegmentsAfterPreserve the list of segments after
* preserve and before filtering by minor compaction level
* @param tblProps
* @return the list of segments to be merged after filtering by minor compaction level
*/
private static List<LoadMetadataDetails> identifySegmentsToBeMergedBasedOnSegCount(long compactionSize, List<LoadMetadataDetails> listOfSegmentsAfterPreserve, Map<String, String> tblProps, CarbonLoadModel carbonLoadModel) throws IOException {
List<LoadMetadataDetails> mergedSegments = new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
List<LoadMetadataDetails> unMergedSegments = new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
int[] noOfSegmentLevelsCount = CarbonProperties.getInstance().getCompactionSegmentLevelCount();
// overwrite system level option by table level option if exists
if (tblProps.containsKey(CarbonCommonConstants.TABLE_COMPACTION_LEVEL_THRESHOLD)) {
noOfSegmentLevelsCount = CarbonProperties.getInstance().getIntArray(tblProps.get(CarbonCommonConstants.TABLE_COMPACTION_LEVEL_THRESHOLD));
if (0 == noOfSegmentLevelsCount.length) {
noOfSegmentLevelsCount = CarbonProperties.getInstance().getCompactionSegmentLevelCount();
}
}
int level1Size = 0;
int level2Size = 0;
int size = noOfSegmentLevelsCount.length;
if (size >= 2) {
level1Size = noOfSegmentLevelsCount[0];
level2Size = noOfSegmentLevelsCount[1];
/*
Ex. if segs => 0.1,2,3 and threshold =2,1
during 2nd time compaction,mergeCounter becomes 1 and we checks if mergeCounter==level2Size
then return mergedSegments which will return 0.1 and since only 1 segment(0.1) is identified ,
no segment would go for compaction .So change 2nd level threshold to 0 if it is 1.
*/
level2Size = level2Size == 1 ? 0 : level2Size;
} else if (size == 1) {
level1Size = noOfSegmentLevelsCount[0];
}
int unMergeCounter = 0;
int mergeCounter = 0;
CarbonTable carbonTable = carbonLoadModel.getCarbonDataLoadSchema().getCarbonTable();
// check size of each segment , sum it up across partitions
for (LoadMetadataDetails segment : listOfSegmentsAfterPreserve) {
long sizeOfOneSegmentAcrossPartition;
if (segment.getSegmentFile() != null) {
// index files. If not there then read the index file and calculate size.
if (!StringUtils.isEmpty(segment.getDataSize())) {
sizeOfOneSegmentAcrossPartition = Long.parseLong(segment.getDataSize());
} else {
sizeOfOneSegmentAcrossPartition = CarbonUtil.getSizeOfSegment(carbonTable.getTablePath(), new Segment(segment.getLoadName(), segment.getSegmentFile()));
}
} else {
sizeOfOneSegmentAcrossPartition = getSizeOfSegment(carbonTable.getTablePath(), segment.getLoadName());
}
// segment size.
if (segment.getSegmentStatus() == SegmentStatus.STREAMING || segment.getSegmentStatus() == SegmentStatus.STREAMING_FINISH || (compactionSize > 0 && sizeOfOneSegmentAcrossPartition / (1024 * 1024) >= compactionSize)) {
continue;
}
String segName = segment.getLoadName();
// if a segment is already merged 2 or more levels (possible from custom compaction),
// need to exclude those segments from minor compaction.
// if a segment is major compacted then should not be considered for minor.
boolean isMoreThanOrEqualsToLevel2 = false;
if (segName.contains(".")) {
if (Integer.parseInt(segName.substring(segName.lastIndexOf(".") + 1)) >= 2) {
isMoreThanOrEqualsToLevel2 = true;
}
}
if (isMoreThanOrEqualsToLevel2 || (segment.isMajorCompacted() != null && segment.isMajorCompacted().equalsIgnoreCase("true"))) {
continue;
}
// check if the segment is merged or not, consider only non-compacted segments for merge.
if ((segment.getSegmentStatus() == SegmentStatus.SUCCESS) || (segment.getSegmentStatus() == SegmentStatus.LOAD_PARTIAL_SUCCESS)) {
if (!isMergedSegment(segName)) {
// if it is an unmerged segment then increment counter
unMergeCounter++;
unMergedSegments.add(segment);
if (unMergeCounter == (level1Size)) {
return unMergedSegments;
}
} else {
mergeCounter++;
mergedSegments.add(segment);
if (mergeCounter == (level2Size)) {
return mergedSegments;
}
}
}
}
return new ArrayList<>(0);
}
use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.
the class CarbonDataMergerUtil method identifySegmentsToBeMergedBasedOnSize.
/**
* Identify the segments to be merged based on the Size in case of Major compaction.
*
* @param compactionSize compaction size in MB format
* @param listOfSegmentsAfterPreserve the segments list after
* preserving the configured number of latest loads
* @param carbonLoadModel carbon load model
* @return the list of segments that need to be merged
* based on the Size in case of Major compaction
*/
private static List<LoadMetadataDetails> identifySegmentsToBeMergedBasedOnSize(long compactionSize, List<LoadMetadataDetails> listOfSegmentsAfterPreserve, CarbonLoadModel carbonLoadModel) throws IOException {
List<LoadMetadataDetails> segmentsToBeMerged = new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
CarbonTable carbonTable = carbonLoadModel.getCarbonDataLoadSchema().getCarbonTable();
// total length
long totalLength = 0;
// check size of each segment , sum it up across partitions
for (LoadMetadataDetails segment : listOfSegmentsAfterPreserve) {
// compaction should skip streaming segments
if (segment.getSegmentStatus() == SegmentStatus.STREAMING || segment.getSegmentStatus() == SegmentStatus.STREAMING_FINISH) {
continue;
}
String segId = segment.getLoadName();
// variable to store one segment size across partition.
long sizeOfOneSegmentAcrossPartition;
if (segment.getSegmentFile() != null) {
// index files. If not there then read the index file and calculate size.
if (!StringUtils.isEmpty(segment.getDataSize())) {
sizeOfOneSegmentAcrossPartition = Long.parseLong(segment.getDataSize());
} else {
sizeOfOneSegmentAcrossPartition = CarbonUtil.getSizeOfSegment(carbonTable.getTablePath(), new Segment(segId, segment.getSegmentFile()));
}
} else {
sizeOfOneSegmentAcrossPartition = getSizeOfSegment(carbonTable.getTablePath(), segId);
}
// if size of a segment is greater than the Major compaction size. then ignore it.
if (sizeOfOneSegmentAcrossPartition > (compactionSize * 1024 * 1024)) {
// if already 2 segments have been found for merging then stop scan here and merge.
if (segmentsToBeMerged.size() > 1) {
break;
} else {
// if only one segment is found then remove the earlier one in list.
// reset the total length to 0.
segmentsToBeMerged = new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
totalLength = 0;
continue;
}
}
totalLength += sizeOfOneSegmentAcrossPartition;
// in case of major compaction the size doesnt matter. all the segments will be merged.
if (totalLength < (compactionSize * 1024 * 1024)) {
segmentsToBeMerged.add(segment);
} else {
// if already 2 segments have been found for merging then stop scan here and merge.
if (segmentsToBeMerged.size() > 1) {
break;
} else {
// if only one segment is found then remove the earlier one in list and put this.
// reset the total length to the current identified segment.
segmentsToBeMerged = new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
segmentsToBeMerged.add(segment);
totalLength = sizeOfOneSegmentAcrossPartition;
}
}
}
return segmentsToBeMerged;
}
use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.
the class CarbonMetadataTest method testGetCarbonDimensionBasedOnColIdentifier.
@Test
public void testGetCarbonDimensionBasedOnColIdentifier() {
CarbonTable carbonTable = CarbonTable.buildFromTableInfo(getTableInfo(1000L));
String columnIdentifier = "1";
final List<CarbonDimension> carbonDimensions = new ArrayList();
ColumnSchema colSchema1 = new ColumnSchema();
ColumnSchema colSchema2 = new ColumnSchema();
colSchema1.setColumnUniqueId("1");
colSchema2.setColumnUniqueId("2");
carbonDimensions.add(new CarbonDimension(colSchema1, 1, 1, 1));
carbonDimensions.add(new CarbonDimension(colSchema2, 2, 2, 2));
new MockUp<CarbonTable>() {
@Mock
public String getTableName() {
return "carbonTestTable";
}
@Mock
public List<CarbonDimension> getVisibleDimensions() {
return carbonDimensions;
}
};
CarbonDimension expectedResult = carbonDimensions.get(0);
CarbonDimension actualResult = carbonMetadata.getCarbonDimensionBasedOnColIdentifier(carbonTable, columnIdentifier);
assertEquals(expectedResult, actualResult);
}
use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.
the class CarbonMetadataTest method testGetCarbonDimensionBasedOnColIdentifierWhenChildDimensionColumnEqualsColumnIdentifier.
@Test
public void testGetCarbonDimensionBasedOnColIdentifierWhenChildDimensionColumnEqualsColumnIdentifier() {
CarbonTable carbonTable = CarbonTable.buildFromTableInfo(getTableInfo(1000L));
String columnIdentifier = "9";
final List<CarbonDimension> carbonDimensions = new ArrayList();
ColumnSchema colSchema1 = new ColumnSchema();
ColumnSchema colSchema2 = new ColumnSchema();
colSchema1.setColumnUniqueId("1");
carbonDimensions.add(new CarbonDimension(colSchema1, 1, 1, 1));
final List<CarbonDimension> carbonChildDimensions = new ArrayList();
ColumnSchema colSchema3 = new ColumnSchema();
colSchema3.setColumnUniqueId("9");
colSchema2.setColumnUniqueId("2");
carbonChildDimensions.add(new CarbonDimension(colSchema3, 1, 1, 1));
new MockUp<CarbonTable>() {
@Mock
public String getTableName() {
return "carbonTestTable";
}
@Mock
public List<CarbonDimension> getVisibleDimensions() {
return carbonDimensions;
}
};
new MockUp<CarbonDimension>() {
@Mock
public int getNumberOfChild() {
return 1;
}
@Mock
public List<CarbonDimension> getListOfChildDimensions() {
return carbonChildDimensions;
}
};
CarbonDimension expectedResult = carbonChildDimensions.get(0);
CarbonDimension actualResult = carbonMetadata.getCarbonDimensionBasedOnColIdentifier(carbonTable, columnIdentifier);
assertEquals(expectedResult, actualResult);
}
use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.
the class CarbonTableReader method updateSchemaTables.
/**
* Find all the tables under the schema store path (this.carbonFileList)
* and cache all the table names in this.tableList. Notice that whenever this method
* is called, it clears this.tableList and populate the list by reading the files.
*/
private void updateSchemaTables(SchemaTableName schemaTableName, Configuration config) {
CarbonTableCacheModel carbonTableCacheModel = carbonCache.get().get(schemaTableName);
if (carbonTableCacheModel != null && carbonTableCacheModel.getCarbonTable().isTransactionalTable()) {
CarbonTable carbonTable = carbonTableCacheModel.getCarbonTable();
long latestTime = FileFactory.getCarbonFile(CarbonTablePath.getSchemaFilePath(carbonTable.getTablePath()), config).getLastModifiedTime();
carbonTableCacheModel.setCurrentSchemaTime(latestTime);
if (!carbonTableCacheModel.isValid()) {
// Invalidate indexes
IndexStoreManager.getInstance().clearIndex(carbonTableCacheModel.getCarbonTable().getAbsoluteTableIdentifier());
}
}
}
Aggregations