Search in sources :

Example 81 with CarbonTable

use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.

the class CarbonDataMergerUtil method identifySegmentsToBeMergedBasedOnSegCount.

/**
 * Identify the segments to be merged based on the segment count, the segment whose data size
 * exceed minor compaction size threshold will not be compacted.
 *
 * @param listOfSegmentsAfterPreserve the list of segments after
 *        preserve and before filtering by minor compaction level
 * @param tblProps
 * @return the list of segments to be merged after filtering by minor compaction level
 */
private static List<LoadMetadataDetails> identifySegmentsToBeMergedBasedOnSegCount(long compactionSize, List<LoadMetadataDetails> listOfSegmentsAfterPreserve, Map<String, String> tblProps, CarbonLoadModel carbonLoadModel) throws IOException {
    List<LoadMetadataDetails> mergedSegments = new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
    List<LoadMetadataDetails> unMergedSegments = new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
    int[] noOfSegmentLevelsCount = CarbonProperties.getInstance().getCompactionSegmentLevelCount();
    // overwrite system level option by table level option if exists
    if (tblProps.containsKey(CarbonCommonConstants.TABLE_COMPACTION_LEVEL_THRESHOLD)) {
        noOfSegmentLevelsCount = CarbonProperties.getInstance().getIntArray(tblProps.get(CarbonCommonConstants.TABLE_COMPACTION_LEVEL_THRESHOLD));
        if (0 == noOfSegmentLevelsCount.length) {
            noOfSegmentLevelsCount = CarbonProperties.getInstance().getCompactionSegmentLevelCount();
        }
    }
    int level1Size = 0;
    int level2Size = 0;
    int size = noOfSegmentLevelsCount.length;
    if (size >= 2) {
        level1Size = noOfSegmentLevelsCount[0];
        level2Size = noOfSegmentLevelsCount[1];
        /*
      Ex. if segs => 0.1,2,3 and threshold =2,1
      during 2nd time compaction,mergeCounter becomes 1 and we checks if mergeCounter==level2Size
      then return mergedSegments which will return 0.1 and since only 1 segment(0.1) is identified ,
      no segment would go for compaction .So change 2nd level threshold  to 0 if it is 1.
       */
        level2Size = level2Size == 1 ? 0 : level2Size;
    } else if (size == 1) {
        level1Size = noOfSegmentLevelsCount[0];
    }
    int unMergeCounter = 0;
    int mergeCounter = 0;
    CarbonTable carbonTable = carbonLoadModel.getCarbonDataLoadSchema().getCarbonTable();
    // check size of each segment , sum it up across partitions
    for (LoadMetadataDetails segment : listOfSegmentsAfterPreserve) {
        long sizeOfOneSegmentAcrossPartition;
        if (segment.getSegmentFile() != null) {
            // index files. If not there then read the index file and calculate size.
            if (!StringUtils.isEmpty(segment.getDataSize())) {
                sizeOfOneSegmentAcrossPartition = Long.parseLong(segment.getDataSize());
            } else {
                sizeOfOneSegmentAcrossPartition = CarbonUtil.getSizeOfSegment(carbonTable.getTablePath(), new Segment(segment.getLoadName(), segment.getSegmentFile()));
            }
        } else {
            sizeOfOneSegmentAcrossPartition = getSizeOfSegment(carbonTable.getTablePath(), segment.getLoadName());
        }
        // segment size.
        if (segment.getSegmentStatus() == SegmentStatus.STREAMING || segment.getSegmentStatus() == SegmentStatus.STREAMING_FINISH || (compactionSize > 0 && sizeOfOneSegmentAcrossPartition / (1024 * 1024) >= compactionSize)) {
            continue;
        }
        String segName = segment.getLoadName();
        // if a segment is already merged 2 or more levels (possible from custom compaction),
        // need to exclude those segments from minor compaction.
        // if a segment is major compacted then should not be considered for minor.
        boolean isMoreThanOrEqualsToLevel2 = false;
        if (segName.contains(".")) {
            if (Integer.parseInt(segName.substring(segName.lastIndexOf(".") + 1)) >= 2) {
                isMoreThanOrEqualsToLevel2 = true;
            }
        }
        if (isMoreThanOrEqualsToLevel2 || (segment.isMajorCompacted() != null && segment.isMajorCompacted().equalsIgnoreCase("true"))) {
            continue;
        }
        // check if the segment is merged or not, consider only non-compacted segments for merge.
        if ((segment.getSegmentStatus() == SegmentStatus.SUCCESS) || (segment.getSegmentStatus() == SegmentStatus.LOAD_PARTIAL_SUCCESS)) {
            if (!isMergedSegment(segName)) {
                // if it is an unmerged segment then increment counter
                unMergeCounter++;
                unMergedSegments.add(segment);
                if (unMergeCounter == (level1Size)) {
                    return unMergedSegments;
                }
            } else {
                mergeCounter++;
                mergedSegments.add(segment);
                if (mergeCounter == (level2Size)) {
                    return mergedSegments;
                }
            }
        }
    }
    return new ArrayList<>(0);
}
Also used : CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) ArrayList(java.util.ArrayList) Segment(org.apache.carbondata.core.index.Segment)

Example 82 with CarbonTable

use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.

the class CarbonDataMergerUtil method identifySegmentsToBeMergedBasedOnSize.

/**
 * Identify the segments to be merged based on the Size in case of Major compaction.
 *
 * @param compactionSize compaction size in MB format
 * @param listOfSegmentsAfterPreserve  the segments list after
 *        preserving the configured number of latest loads
 * @param carbonLoadModel carbon load model
 * @return the list of segments that need to be merged
 *         based on the Size in case of Major compaction
 */
private static List<LoadMetadataDetails> identifySegmentsToBeMergedBasedOnSize(long compactionSize, List<LoadMetadataDetails> listOfSegmentsAfterPreserve, CarbonLoadModel carbonLoadModel) throws IOException {
    List<LoadMetadataDetails> segmentsToBeMerged = new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
    CarbonTable carbonTable = carbonLoadModel.getCarbonDataLoadSchema().getCarbonTable();
    // total length
    long totalLength = 0;
    // check size of each segment , sum it up across partitions
    for (LoadMetadataDetails segment : listOfSegmentsAfterPreserve) {
        // compaction should skip streaming segments
        if (segment.getSegmentStatus() == SegmentStatus.STREAMING || segment.getSegmentStatus() == SegmentStatus.STREAMING_FINISH) {
            continue;
        }
        String segId = segment.getLoadName();
        // variable to store one  segment size across partition.
        long sizeOfOneSegmentAcrossPartition;
        if (segment.getSegmentFile() != null) {
            // index files. If not there then read the index file and calculate size.
            if (!StringUtils.isEmpty(segment.getDataSize())) {
                sizeOfOneSegmentAcrossPartition = Long.parseLong(segment.getDataSize());
            } else {
                sizeOfOneSegmentAcrossPartition = CarbonUtil.getSizeOfSegment(carbonTable.getTablePath(), new Segment(segId, segment.getSegmentFile()));
            }
        } else {
            sizeOfOneSegmentAcrossPartition = getSizeOfSegment(carbonTable.getTablePath(), segId);
        }
        // if size of a segment is greater than the Major compaction size. then ignore it.
        if (sizeOfOneSegmentAcrossPartition > (compactionSize * 1024 * 1024)) {
            // if already 2 segments have been found for merging then stop scan here and merge.
            if (segmentsToBeMerged.size() > 1) {
                break;
            } else {
                // if only one segment is found then remove the earlier one in list.
                // reset the total length to 0.
                segmentsToBeMerged = new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
                totalLength = 0;
                continue;
            }
        }
        totalLength += sizeOfOneSegmentAcrossPartition;
        // in case of major compaction the size doesnt matter. all the segments will be merged.
        if (totalLength < (compactionSize * 1024 * 1024)) {
            segmentsToBeMerged.add(segment);
        } else {
            // if already 2 segments have been found for merging then stop scan here and merge.
            if (segmentsToBeMerged.size() > 1) {
                break;
            } else {
                // if only one segment is found then remove the earlier one in list and put this.
                // reset the total length to the current identified segment.
                segmentsToBeMerged = new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
                segmentsToBeMerged.add(segment);
                totalLength = sizeOfOneSegmentAcrossPartition;
            }
        }
    }
    return segmentsToBeMerged;
}
Also used : CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) ArrayList(java.util.ArrayList) Segment(org.apache.carbondata.core.index.Segment)

Example 83 with CarbonTable

use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.

the class CarbonMetadataTest method testGetCarbonDimensionBasedOnColIdentifier.

@Test
public void testGetCarbonDimensionBasedOnColIdentifier() {
    CarbonTable carbonTable = CarbonTable.buildFromTableInfo(getTableInfo(1000L));
    String columnIdentifier = "1";
    final List<CarbonDimension> carbonDimensions = new ArrayList();
    ColumnSchema colSchema1 = new ColumnSchema();
    ColumnSchema colSchema2 = new ColumnSchema();
    colSchema1.setColumnUniqueId("1");
    colSchema2.setColumnUniqueId("2");
    carbonDimensions.add(new CarbonDimension(colSchema1, 1, 1, 1));
    carbonDimensions.add(new CarbonDimension(colSchema2, 2, 2, 2));
    new MockUp<CarbonTable>() {

        @Mock
        public String getTableName() {
            return "carbonTestTable";
        }

        @Mock
        public List<CarbonDimension> getVisibleDimensions() {
            return carbonDimensions;
        }
    };
    CarbonDimension expectedResult = carbonDimensions.get(0);
    CarbonDimension actualResult = carbonMetadata.getCarbonDimensionBasedOnColIdentifier(carbonTable, columnIdentifier);
    assertEquals(expectedResult, actualResult);
}
Also used : CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) ArrayList(java.util.ArrayList) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) MockUp(mockit.MockUp) CarbonDimension(org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension) Test(org.junit.Test)

Example 84 with CarbonTable

use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.

the class CarbonMetadataTest method testGetCarbonDimensionBasedOnColIdentifierWhenChildDimensionColumnEqualsColumnIdentifier.

@Test
public void testGetCarbonDimensionBasedOnColIdentifierWhenChildDimensionColumnEqualsColumnIdentifier() {
    CarbonTable carbonTable = CarbonTable.buildFromTableInfo(getTableInfo(1000L));
    String columnIdentifier = "9";
    final List<CarbonDimension> carbonDimensions = new ArrayList();
    ColumnSchema colSchema1 = new ColumnSchema();
    ColumnSchema colSchema2 = new ColumnSchema();
    colSchema1.setColumnUniqueId("1");
    carbonDimensions.add(new CarbonDimension(colSchema1, 1, 1, 1));
    final List<CarbonDimension> carbonChildDimensions = new ArrayList();
    ColumnSchema colSchema3 = new ColumnSchema();
    colSchema3.setColumnUniqueId("9");
    colSchema2.setColumnUniqueId("2");
    carbonChildDimensions.add(new CarbonDimension(colSchema3, 1, 1, 1));
    new MockUp<CarbonTable>() {

        @Mock
        public String getTableName() {
            return "carbonTestTable";
        }

        @Mock
        public List<CarbonDimension> getVisibleDimensions() {
            return carbonDimensions;
        }
    };
    new MockUp<CarbonDimension>() {

        @Mock
        public int getNumberOfChild() {
            return 1;
        }

        @Mock
        public List<CarbonDimension> getListOfChildDimensions() {
            return carbonChildDimensions;
        }
    };
    CarbonDimension expectedResult = carbonChildDimensions.get(0);
    CarbonDimension actualResult = carbonMetadata.getCarbonDimensionBasedOnColIdentifier(carbonTable, columnIdentifier);
    assertEquals(expectedResult, actualResult);
}
Also used : CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) ArrayList(java.util.ArrayList) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) MockUp(mockit.MockUp) CarbonDimension(org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension) Test(org.junit.Test)

Example 85 with CarbonTable

use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.

the class CarbonTableReader method updateSchemaTables.

/**
 * Find all the tables under the schema store path (this.carbonFileList)
 * and cache all the table names in this.tableList. Notice that whenever this method
 * is called, it clears this.tableList and populate the list by reading the files.
 */
private void updateSchemaTables(SchemaTableName schemaTableName, Configuration config) {
    CarbonTableCacheModel carbonTableCacheModel = carbonCache.get().get(schemaTableName);
    if (carbonTableCacheModel != null && carbonTableCacheModel.getCarbonTable().isTransactionalTable()) {
        CarbonTable carbonTable = carbonTableCacheModel.getCarbonTable();
        long latestTime = FileFactory.getCarbonFile(CarbonTablePath.getSchemaFilePath(carbonTable.getTablePath()), config).getLastModifiedTime();
        carbonTableCacheModel.setCurrentSchemaTime(latestTime);
        if (!carbonTableCacheModel.isValid()) {
            // Invalidate indexes
            IndexStoreManager.getInstance().clearIndex(carbonTableCacheModel.getCarbonTable().getAbsoluteTableIdentifier());
        }
    }
}
Also used : CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable)

Aggregations

CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)101 ArrayList (java.util.ArrayList)36 IOException (java.io.IOException)31 LoadMetadataDetails (org.apache.carbondata.core.statusmanager.LoadMetadataDetails)19 AbsoluteTableIdentifier (org.apache.carbondata.core.metadata.AbsoluteTableIdentifier)18 ColumnSchema (org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema)16 Configuration (org.apache.hadoop.conf.Configuration)15 TableInfo (org.apache.carbondata.core.metadata.schema.table.TableInfo)14 Map (java.util.Map)13 CarbonFile (org.apache.carbondata.core.datastore.filesystem.CarbonFile)13 List (java.util.List)12 CarbonDimension (org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension)12 HashMap (java.util.HashMap)11 CarbonTablePath (org.apache.carbondata.core.util.path.CarbonTablePath)11 File (java.io.File)9 Expression (org.apache.carbondata.core.scan.expression.Expression)9 PartitionSpec (org.apache.carbondata.core.indexstore.PartitionSpec)8 CarbonInputSplit (org.apache.carbondata.hadoop.CarbonInputSplit)8 InputSplit (org.apache.hadoop.mapreduce.InputSplit)8 Test (org.junit.Test)8