Search in sources :

Example 66 with CarbonTable

use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.

the class CarbonDataMergerUtil method identifySegmentsToBeMergedBasedOnSize.

/**
 * Identify the segments to be merged based on the Size in case of Major compaction.
 *
 * @param compactionSize compaction size in MB format
 * @param listOfSegmentsAfterPreserve  the segments list after
 *        preserving the configured number of latest loads
 * @param carbonLoadModel carbon load model
 * @param tablePath the store location of the segment
 * @return the list of segments that need to be merged
 *         based on the Size in case of Major compaction
 */
private static List<LoadMetadataDetails> identifySegmentsToBeMergedBasedOnSize(long compactionSize, List<LoadMetadataDetails> listOfSegmentsAfterPreserve, CarbonLoadModel carbonLoadModel, String tablePath) throws IOException {
    List<LoadMetadataDetails> segmentsToBeMerged = new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
    CarbonTable carbonTable = carbonLoadModel.getCarbonDataLoadSchema().getCarbonTable();
    // total length
    long totalLength = 0;
    // check size of each segment , sum it up across partitions
    for (LoadMetadataDetails segment : listOfSegmentsAfterPreserve) {
        // compaction should skip streaming segments
        if (segment.getSegmentStatus() == SegmentStatus.STREAMING || segment.getSegmentStatus() == SegmentStatus.STREAMING_FINISH) {
            continue;
        }
        String segId = segment.getLoadName();
        // variable to store one  segment size across partition.
        long sizeOfOneSegmentAcrossPartition;
        if (segment.getSegmentFile() != null) {
            sizeOfOneSegmentAcrossPartition = CarbonUtil.getSizeOfSegment(carbonTable.getTablePath(), new Segment(segId, segment.getSegmentFile()));
        } else {
            sizeOfOneSegmentAcrossPartition = getSizeOfSegment(carbonTable.getTablePath(), segId);
        }
        // if size of a segment is greater than the Major compaction size. then ignore it.
        if (sizeOfOneSegmentAcrossPartition > (compactionSize * 1024 * 1024)) {
            // if already 2 segments have been found for merging then stop scan here and merge.
            if (segmentsToBeMerged.size() > 1) {
                break;
            } else {
                // if only one segment is found then remove the earlier one in list.
                // reset the total length to 0.
                segmentsToBeMerged = new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
                totalLength = 0;
                continue;
            }
        }
        totalLength += sizeOfOneSegmentAcrossPartition;
        // in case of major compaction the size doesnt matter. all the segments will be merged.
        if (totalLength < (compactionSize * 1024 * 1024)) {
            segmentsToBeMerged.add(segment);
        } else {
            // if already 2 segments have been found for merging then stop scan here and merge.
            if (segmentsToBeMerged.size() > 1) {
                break;
            } else {
                // if only one segment is found then remove the earlier one in list and put this.
                // reset the total length to the current identified segment.
                segmentsToBeMerged = new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
                segmentsToBeMerged.add(segment);
                totalLength = sizeOfOneSegmentAcrossPartition;
            }
        }
    }
    return segmentsToBeMerged;
}
Also used : CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) Segment(org.apache.carbondata.core.datamap.Segment)

Example 67 with CarbonTable

use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.

the class CarbonTableInputFormat method getSplitsOfOneSegment.

/**
 * Read data in one segment. For alter table partition statement
 * @param job
 * @param targetSegment
 * @param oldPartitionIdList  get old partitionId before partitionInfo was changed
 * @return
 */
public List<InputSplit> getSplitsOfOneSegment(JobContext job, String targetSegment, List<Integer> oldPartitionIdList, PartitionInfo partitionInfo) {
    List<Segment> invalidSegments = new ArrayList<>();
    List<UpdateVO> invalidTimestampsList = new ArrayList<>();
    List<Segment> segmentList = new ArrayList<>();
    segmentList.add(new Segment(targetSegment, null));
    setSegmentsToAccess(job.getConfiguration(), segmentList);
    try {
        // process and resolve the expression
        Expression filter = getFilterPredicates(job.getConfiguration());
        CarbonTable carbonTable = getOrCreateCarbonTable(job.getConfiguration());
        // this will be null in case of corrupt schema file.
        if (null == carbonTable) {
            throw new IOException("Missing/Corrupt schema file for table.");
        }
        carbonTable.processFilterExpression(filter, null, null);
        TableProvider tableProvider = new SingleTableProvider(carbonTable);
        // prune partitions for filter query on partition table
        String partitionIds = job.getConfiguration().get(ALTER_PARTITION_ID);
        // matchedPartitions records partitionIndex, not partitionId
        BitSet matchedPartitions = null;
        if (partitionInfo != null) {
            matchedPartitions = setMatchedPartitions(partitionIds, filter, partitionInfo, oldPartitionIdList);
            if (matchedPartitions != null) {
                if (matchedPartitions.cardinality() == 0) {
                    return new ArrayList<InputSplit>();
                } else if (matchedPartitions.cardinality() == partitionInfo.getNumPartitions()) {
                    matchedPartitions = null;
                }
            }
        }
        FilterResolverIntf filterInterface = carbonTable.resolveFilter(filter, tableProvider);
        // do block filtering and get split
        List<InputSplit> splits = getSplits(job, filterInterface, segmentList, matchedPartitions, partitionInfo, oldPartitionIdList, new SegmentUpdateStatusManager(carbonTable));
        // pass the invalid segment to task side in order to remove index entry in task side
        if (invalidSegments.size() > 0) {
            for (InputSplit split : splits) {
                ((CarbonInputSplit) split).setInvalidSegments(invalidSegments);
                ((CarbonInputSplit) split).setInvalidTimestampRange(invalidTimestampsList);
            }
        }
        return splits;
    } catch (IOException e) {
        throw new RuntimeException("Can't get splits of the target segment ", e);
    }
}
Also used : SegmentUpdateStatusManager(org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager) ArrayList(java.util.ArrayList) BitSet(java.util.BitSet) IOException(java.io.IOException) TableProvider(org.apache.carbondata.core.scan.filter.TableProvider) SingleTableProvider(org.apache.carbondata.core.scan.filter.SingleTableProvider) CarbonInputSplit(org.apache.carbondata.hadoop.CarbonInputSplit) UpdateVO(org.apache.carbondata.core.mutate.UpdateVO) Segment(org.apache.carbondata.core.datamap.Segment) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) SingleTableProvider(org.apache.carbondata.core.scan.filter.SingleTableProvider) Expression(org.apache.carbondata.core.scan.expression.Expression) InputSplit(org.apache.hadoop.mapreduce.InputSplit) CarbonInputSplit(org.apache.carbondata.hadoop.CarbonInputSplit) FilterResolverIntf(org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf)

Example 68 with CarbonTable

use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.

the class StoreCreator method createTableAndLoadModel.

public static CarbonLoadModel createTableAndLoadModel() throws Exception {
    String factFilePath = new File("../hadoop/src/test/resources/data.csv").getCanonicalPath();
    File storeDir = new File(storePath);
    CarbonUtil.deleteFoldersAndFiles(storeDir);
    CarbonProperties.getInstance().addProperty(CarbonCommonConstants.STORE_LOCATION_HDFS, storePath);
    CarbonTable table = createTable(absoluteTableIdentifier);
    writeDictionary(factFilePath, table);
    return buildCarbonLoadModel(table, factFilePath, absoluteTableIdentifier);
}
Also used : CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) File(java.io.File)

Example 69 with CarbonTable

use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.

the class CarbondataMetadata method getTableMetadata.

private ConnectorTableMetadata getTableMetadata(SchemaTableName schemaTableName) {
    if (!listSchemaNamesInternal().contains(schemaTableName.getSchemaName())) {
        throw new SchemaNotFoundException(schemaTableName.getSchemaName());
    }
    CarbonTable carbonTable = carbonTableReader.getTable(schemaTableName);
    List<ColumnMetadata> columnsMetaList = new LinkedList<>();
    List<CarbonColumn> carbonColumns = carbonTable.getCreateOrderColumn(schemaTableName.getTableName());
    for (CarbonColumn col : carbonColumns) {
        // show columns command will return these data
        Type columnType = carbonDataType2SpiMapper(col.getColumnSchema());
        ColumnMetadata columnMeta = new ColumnMetadata(col.getColumnSchema().getColumnName(), columnType);
        columnsMetaList.add(columnMeta);
    }
    // carbondata connector's table metadata
    return new ConnectorTableMetadata(schemaTableName, columnsMetaList);
}
Also used : CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) DataType(org.apache.carbondata.core.metadata.datatype.DataType) Types.checkType(org.apache.carbondata.presto.Types.checkType) CarbonColumn(org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn)

Example 70 with CarbonTable

use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.

the class CarbonTableReader method getInputSplits2.

public List<CarbonLocalInputSplit> getInputSplits2(CarbonTableCacheModel tableCacheModel, Expression filters) {
    List<CarbonLocalInputSplit> result = new ArrayList<>();
    if (config.getUnsafeMemoryInMb() != null) {
        CarbonProperties.getInstance().addProperty(CarbonCommonConstants.UNSAFE_WORKING_MEMORY_IN_MB, config.getUnsafeMemoryInMb());
    }
    CarbonTable carbonTable = tableCacheModel.carbonTable;
    TableInfo tableInfo = tableCacheModel.carbonTable.getTableInfo();
    Configuration config = new Configuration();
    config.set(CarbonTableInputFormat.INPUT_SEGMENT_NUMBERS, "");
    String carbonTablePath = carbonTable.getAbsoluteTableIdentifier().getTablePath();
    config.set(CarbonTableInputFormat.INPUT_DIR, carbonTablePath);
    config.set(CarbonTableInputFormat.DATABASE_NAME, carbonTable.getDatabaseName());
    config.set(CarbonTableInputFormat.TABLE_NAME, carbonTable.getTableName());
    try {
        CarbonTableInputFormat.setTableInfo(config, tableInfo);
        CarbonTableInputFormat carbonTableInputFormat = createInputFormat(config, carbonTable.getAbsoluteTableIdentifier(), filters);
        JobConf jobConf = new JobConf(config);
        Job job = Job.getInstance(jobConf);
        List<InputSplit> splits = carbonTableInputFormat.getSplits(job);
        CarbonInputSplit carbonInputSplit = null;
        Gson gson = new Gson();
        if (splits != null && splits.size() > 0) {
            for (InputSplit inputSplit : splits) {
                carbonInputSplit = (CarbonInputSplit) inputSplit;
                result.add(new CarbonLocalInputSplit(carbonInputSplit.getSegmentId(), carbonInputSplit.getPath().toString(), carbonInputSplit.getStart(), carbonInputSplit.getLength(), Arrays.asList(carbonInputSplit.getLocations()), carbonInputSplit.getNumberOfBlocklets(), carbonInputSplit.getVersion().number(), carbonInputSplit.getDeleteDeltaFiles(), gson.toJson(carbonInputSplit.getDetailInfo())));
            }
        }
    } catch (IOException e) {
        throw new RuntimeException("Error creating Splits from CarbonTableInputFormat", e);
    }
    return result;
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) Gson(com.facebook.presto.hadoop.$internal.com.google.gson.Gson) CarbonInputSplit(org.apache.carbondata.hadoop.CarbonInputSplit) IOException(java.io.IOException) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) CarbonTableInputFormat(org.apache.carbondata.hadoop.api.CarbonTableInputFormat) TableInfo(org.apache.carbondata.core.metadata.schema.table.TableInfo) Job(org.apache.hadoop.mapreduce.Job) JobConf(org.apache.hadoop.mapred.JobConf) InputSplit(org.apache.hadoop.mapreduce.InputSplit) CarbonInputSplit(org.apache.carbondata.hadoop.CarbonInputSplit)

Aggregations

CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)101 ArrayList (java.util.ArrayList)36 IOException (java.io.IOException)31 LoadMetadataDetails (org.apache.carbondata.core.statusmanager.LoadMetadataDetails)19 AbsoluteTableIdentifier (org.apache.carbondata.core.metadata.AbsoluteTableIdentifier)18 ColumnSchema (org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema)16 Configuration (org.apache.hadoop.conf.Configuration)15 TableInfo (org.apache.carbondata.core.metadata.schema.table.TableInfo)14 Map (java.util.Map)13 CarbonFile (org.apache.carbondata.core.datastore.filesystem.CarbonFile)13 List (java.util.List)12 CarbonDimension (org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension)12 HashMap (java.util.HashMap)11 CarbonTablePath (org.apache.carbondata.core.util.path.CarbonTablePath)11 File (java.io.File)9 Expression (org.apache.carbondata.core.scan.expression.Expression)9 PartitionSpec (org.apache.carbondata.core.indexstore.PartitionSpec)8 CarbonInputSplit (org.apache.carbondata.hadoop.CarbonInputSplit)8 InputSplit (org.apache.hadoop.mapreduce.InputSplit)8 Test (org.junit.Test)8