Search in sources :

Example 1 with AbsoluteTableIdentifier

use of org.apache.carbondata.core.metadata.AbsoluteTableIdentifier in project carbondata by apache.

the class CarbonUpdateUtil method cleanStaleDeltaFiles.

/**
   * This will handle the clean up cases if the update fails.
   *
   * @param table
   * @param timeStamp
   */
public static void cleanStaleDeltaFiles(CarbonTable table, final String timeStamp) {
    AbsoluteTableIdentifier absoluteTableIdentifier = table.getAbsoluteTableIdentifier();
    CarbonTablePath carbonTablePath = CarbonStorePath.getCarbonTablePath(absoluteTableIdentifier.getStorePath(), absoluteTableIdentifier.getCarbonTableIdentifier());
    // as of now considering only partition 0.
    String partitionId = "0";
    String partitionDir = carbonTablePath.getPartitionDir(partitionId);
    CarbonFile file = FileFactory.getCarbonFile(partitionDir, FileFactory.getFileType(partitionDir));
    if (!file.exists()) {
        return;
    }
    for (CarbonFile eachDir : file.listFiles()) {
        // for each dir check if the file with the delta timestamp is present or not.
        CarbonFile[] toBeDeleted = eachDir.listFiles(new CarbonFileFilter() {

            @Override
            public boolean accept(CarbonFile file) {
                String fileName = file.getName();
                return (fileName.endsWith(timeStamp + CarbonCommonConstants.UPDATE_DELTA_FILE_EXT) || fileName.endsWith(timeStamp + CarbonCommonConstants.UPDATE_INDEX_FILE_EXT) || fileName.endsWith(timeStamp + CarbonCommonConstants.DELETE_DELTA_FILE_EXT));
            }
        });
        // deleting the files of a segment.
        try {
            CarbonUtil.deleteFoldersAndFilesSilent(toBeDeleted);
        } catch (IOException e) {
            LOGGER.error("Exception in deleting the delta files." + e);
        } catch (InterruptedException e) {
            LOGGER.error("Exception in deleting the delta files." + e);
        }
    }
}
Also used : CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) CarbonFileFilter(org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter) CarbonTablePath(org.apache.carbondata.core.util.path.CarbonTablePath) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) IOException(java.io.IOException)

Example 2 with AbsoluteTableIdentifier

use of org.apache.carbondata.core.metadata.AbsoluteTableIdentifier in project carbondata by apache.

the class BlockIndexStoreTest method testLoadAndGetTaskIdToSegmentsMapForSingleSegment.

@Test
public void testLoadAndGetTaskIdToSegmentsMapForSingleSegment() throws IOException {
    File file = getPartFile();
    TableBlockInfo info = new TableBlockInfo(file.getAbsolutePath(), 0, "0", new String[] { "loclhost" }, file.length(), ColumnarFormatVersion.V1);
    CarbonTableIdentifier carbonTableIdentifier = new CarbonTableIdentifier(CarbonCommonConstants.DATABASE_DEFAULT_NAME, "t3", "1");
    AbsoluteTableIdentifier absoluteTableIdentifier = new AbsoluteTableIdentifier("/src/test/resources", carbonTableIdentifier);
    try {
        List<TableBlockUniqueIdentifier> tableBlockInfoList = getTableBlockUniqueIdentifierList(Arrays.asList(new TableBlockInfo[] { info }), absoluteTableIdentifier);
        List<AbstractIndex> loadAndGetBlocks = cache.getAll(tableBlockInfoList);
        assertTrue(loadAndGetBlocks.size() == 1);
    } catch (Exception e) {
        assertTrue(false);
    }
    List<String> segmentIds = new ArrayList<>();
    segmentIds.add(info.getSegmentId());
    cache.removeTableBlocks(segmentIds, absoluteTableIdentifier);
}
Also used : TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) CarbonTableIdentifier(org.apache.carbondata.core.metadata.CarbonTableIdentifier) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) TableBlockUniqueIdentifier(org.apache.carbondata.core.datastore.block.TableBlockUniqueIdentifier) AbstractIndex(org.apache.carbondata.core.datastore.block.AbstractIndex) ArrayList(java.util.ArrayList) File(java.io.File) IOException(java.io.IOException) Test(org.junit.Test)

Example 3 with AbsoluteTableIdentifier

use of org.apache.carbondata.core.metadata.AbsoluteTableIdentifier in project carbondata by apache.

the class CarbonInputFormat method getSplits.

/**
   * {@inheritDoc}
   * Configurations FileInputFormat.INPUT_DIR
   * are used to get table path to read.
   *
   * @param job
   * @return List<InputSplit> list of CarbonInputSplit
   * @throws IOException
   */
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    AbsoluteTableIdentifier identifier = getAbsoluteTableIdentifier(job.getConfiguration());
    CacheClient cacheClient = new CacheClient(identifier.getStorePath());
    try {
        List<String> invalidSegments = new ArrayList<>();
        List<UpdateVO> invalidTimestampsList = new ArrayList<>();
        // get all valid segments and set them into the configuration
        if (getSegmentsToAccess(job).length == 0) {
            SegmentStatusManager segmentStatusManager = new SegmentStatusManager(identifier);
            SegmentStatusManager.ValidAndInvalidSegmentsInfo segments = segmentStatusManager.getValidAndInvalidSegments();
            SegmentUpdateStatusManager updateStatusManager = new SegmentUpdateStatusManager(identifier);
            setSegmentsToAccess(job.getConfiguration(), segments.getValidSegments());
            if (segments.getValidSegments().size() == 0) {
                return new ArrayList<>(0);
            }
            // remove entry in the segment index if there are invalid segments
            invalidSegments.addAll(segments.getInvalidSegments());
            for (String invalidSegmentId : invalidSegments) {
                invalidTimestampsList.add(updateStatusManager.getInvalidTimestampRange(invalidSegmentId));
            }
            if (invalidSegments.size() > 0) {
                List<TableSegmentUniqueIdentifier> invalidSegmentsIds = new ArrayList<>(invalidSegments.size());
                for (String segId : invalidSegments) {
                    invalidSegmentsIds.add(new TableSegmentUniqueIdentifier(identifier, segId));
                }
                cacheClient.getSegmentAccessClient().invalidateAll(invalidSegmentsIds);
            }
        }
        // process and resolve the expression
        Expression filter = getFilterPredicates(job.getConfiguration());
        CarbonTable carbonTable = getCarbonTable(job.getConfiguration());
        // this will be null in case of corrupt schema file.
        if (null == carbonTable) {
            throw new IOException("Missing/Corrupt schema file for table.");
        }
        CarbonInputFormatUtil.processFilterExpression(filter, carbonTable);
        // prune partitions for filter query on partition table
        BitSet matchedPartitions = null;
        if (null != filter) {
            PartitionInfo partitionInfo = carbonTable.getPartitionInfo(carbonTable.getFactTableName());
            if (null != partitionInfo) {
                Partitioner partitioner = PartitionUtil.getPartitioner(partitionInfo);
                matchedPartitions = new FilterExpressionProcessor().getFilteredPartitions(filter, partitionInfo, partitioner);
                if (matchedPartitions.cardinality() == 0) {
                    // no partition is required
                    return new ArrayList<InputSplit>();
                }
                if (matchedPartitions.cardinality() == partitioner.numPartitions()) {
                    // all partitions are required, no need to prune partitions
                    matchedPartitions = null;
                }
            }
        }
        FilterResolverIntf filterInterface = CarbonInputFormatUtil.resolveFilter(filter, identifier);
        // do block filtering and get split
        List<InputSplit> splits = getSplits(job, filterInterface, matchedPartitions, cacheClient);
        // pass the invalid segment to task side in order to remove index entry in task side
        if (invalidSegments.size() > 0) {
            for (InputSplit split : splits) {
                ((CarbonInputSplit) split).setInvalidSegments(invalidSegments);
                ((CarbonInputSplit) split).setInvalidTimestampRange(invalidTimestampsList);
            }
        }
        return splits;
    } finally {
        // close the cache cache client to clear LRU cache memory
        cacheClient.close();
    }
}
Also used : SegmentUpdateStatusManager(org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager) SegmentStatusManager(org.apache.carbondata.core.statusmanager.SegmentStatusManager) IOException(java.io.IOException) UpdateVO(org.apache.carbondata.core.mutate.UpdateVO) TableSegmentUniqueIdentifier(org.apache.carbondata.core.datastore.TableSegmentUniqueIdentifier) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) FilterExpressionProcessor(org.apache.carbondata.core.scan.filter.FilterExpressionProcessor) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) Expression(org.apache.carbondata.core.scan.expression.Expression) PartitionInfo(org.apache.carbondata.core.metadata.schema.PartitionInfo) InputSplit(org.apache.hadoop.mapreduce.InputSplit) Partitioner(org.apache.carbondata.core.scan.partition.Partitioner) FilterResolverIntf(org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf)

Example 4 with AbsoluteTableIdentifier

use of org.apache.carbondata.core.metadata.AbsoluteTableIdentifier in project carbondata by apache.

the class CarbonInputFormat method getQueryModel.

public QueryModel getQueryModel(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException {
    Configuration configuration = taskAttemptContext.getConfiguration();
    CarbonTable carbonTable = getCarbonTable(configuration);
    // getting the table absoluteTableIdentifier from the carbonTable
    // to avoid unnecessary deserialization
    AbsoluteTableIdentifier identifier = carbonTable.getAbsoluteTableIdentifier();
    // query plan includes projection column
    String projection = getColumnProjection(configuration);
    CarbonQueryPlan queryPlan = CarbonInputFormatUtil.createQueryPlan(carbonTable, projection);
    QueryModel queryModel = QueryModel.createModel(identifier, queryPlan, carbonTable);
    // set the filter to the query model in order to filter blocklet before scan
    Expression filter = getFilterPredicates(configuration);
    CarbonInputFormatUtil.processFilterExpression(filter, carbonTable);
    FilterResolverIntf filterIntf = CarbonInputFormatUtil.resolveFilter(filter, identifier);
    queryModel.setFilterExpressionResolverTree(filterIntf);
    // update the file level index store if there are invalid segment
    if (inputSplit instanceof CarbonMultiBlockSplit) {
        CarbonMultiBlockSplit split = (CarbonMultiBlockSplit) inputSplit;
        List<String> invalidSegments = split.getAllSplits().get(0).getInvalidSegments();
        if (invalidSegments.size() > 0) {
            queryModel.setInvalidSegmentIds(invalidSegments);
        }
        List<UpdateVO> invalidTimestampRangeList = split.getAllSplits().get(0).getInvalidTimestampRange();
        if ((null != invalidTimestampRangeList) && (invalidTimestampRangeList.size() > 0)) {
            queryModel.setInvalidBlockForSegmentId(invalidTimestampRangeList);
        }
    }
    return queryModel;
}
Also used : CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) CarbonQueryPlan(org.apache.carbondata.core.scan.model.CarbonQueryPlan) Configuration(org.apache.hadoop.conf.Configuration) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) Expression(org.apache.carbondata.core.scan.expression.Expression) QueryModel(org.apache.carbondata.core.scan.model.QueryModel) UpdateVO(org.apache.carbondata.core.mutate.UpdateVO) FilterResolverIntf(org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf)

Example 5 with AbsoluteTableIdentifier

use of org.apache.carbondata.core.metadata.AbsoluteTableIdentifier in project carbondata by apache.

the class CarbonDataMergerUtil method updateMajorCompactionPropertyInSegment.

/**
   * This will update the property of segments as major compacted.
   * @param model
   * @param changedSegDetails
   */
public static void updateMajorCompactionPropertyInSegment(CarbonLoadModel model, List<LoadMetadataDetails> changedSegDetails, List<LoadMetadataDetails> preservedSegment) throws Exception {
    String metadataPath = model.getCarbonDataLoadSchema().getCarbonTable().getMetaDataFilepath();
    AbsoluteTableIdentifier absoluteTableIdentifier = model.getCarbonDataLoadSchema().getCarbonTable().getAbsoluteTableIdentifier();
    SegmentStatusManager segmentStatusManager = new SegmentStatusManager(absoluteTableIdentifier);
    LoadMetadataDetails[] details = segmentStatusManager.readLoadMetadata(metadataPath);
    List<LoadMetadataDetails> originalList = Arrays.asList(details);
    for (LoadMetadataDetails segment : changedSegDetails) {
        if (preservedSegment.contains(segment)) {
            continue;
        }
        originalList.get(originalList.indexOf(segment)).setMajorCompacted("true");
    }
    ICarbonLock carbonTableStatusLock = CarbonLockFactory.getCarbonLockObj(model.getCarbonDataLoadSchema().getCarbonTable().getCarbonTableIdentifier(), LockUsage.TABLE_STATUS_LOCK);
    try {
        if (carbonTableStatusLock.lockWithRetries()) {
            LOGGER.info("Acquired lock for the table " + model.getDatabaseName() + "." + model.getTableName() + " for table status updation ");
            CarbonTablePath carbonTablePath = CarbonStorePath.getCarbonTablePath(absoluteTableIdentifier.getStorePath(), absoluteTableIdentifier.getCarbonTableIdentifier());
            segmentStatusManager.writeLoadDetailsIntoFile(carbonTablePath.getTableStatusFilePath(), originalList.toArray(new LoadMetadataDetails[originalList.size()]));
        } else {
            LOGGER.error("Could not able to obtain lock for table" + model.getDatabaseName() + "." + model.getTableName() + "for table status updation");
            throw new Exception("Failed to update the MajorCompactionStatus.");
        }
    } catch (IOException e) {
        LOGGER.error("Error while writing metadata");
        throw new Exception("Failed to update the MajorCompactionStatus." + e.getMessage());
    } finally {
        if (carbonTableStatusLock.unlock()) {
            LOGGER.info("Table unlocked successfully after table status updation" + model.getDatabaseName() + "." + model.getTableName());
        } else {
            LOGGER.error("Unable to unlock Table lock for table" + model.getDatabaseName() + "." + model.getTableName() + " during table status updation");
        }
    }
}
Also used : ICarbonLock(org.apache.carbondata.core.locks.ICarbonLock) CarbonTablePath(org.apache.carbondata.core.util.path.CarbonTablePath) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) SegmentStatusManager(org.apache.carbondata.core.statusmanager.SegmentStatusManager) IOException(java.io.IOException) ParseException(java.text.ParseException) IOException(java.io.IOException)

Aggregations

AbsoluteTableIdentifier (org.apache.carbondata.core.metadata.AbsoluteTableIdentifier)26 IOException (java.io.IOException)13 ArrayList (java.util.ArrayList)11 CarbonTablePath (org.apache.carbondata.core.util.path.CarbonTablePath)10 SegmentStatusManager (org.apache.carbondata.core.statusmanager.SegmentStatusManager)8 ICarbonLock (org.apache.carbondata.core.locks.ICarbonLock)7 LoadMetadataDetails (org.apache.carbondata.core.statusmanager.LoadMetadataDetails)7 TableBlockInfo (org.apache.carbondata.core.datastore.block.TableBlockInfo)6 CarbonTableIdentifier (org.apache.carbondata.core.metadata.CarbonTableIdentifier)6 File (java.io.File)5 CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)5 SegmentUpdateStatusManager (org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager)5 FilterExpressionProcessor (org.apache.carbondata.core.scan.filter.FilterExpressionProcessor)4 FilterResolverIntf (org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf)4 Path (org.apache.hadoop.fs.Path)4 Test (org.junit.Test)4 AbstractIndex (org.apache.carbondata.core.datastore.block.AbstractIndex)3 TableBlockUniqueIdentifier (org.apache.carbondata.core.datastore.block.TableBlockUniqueIdentifier)3 BlockBTreeLeafNode (org.apache.carbondata.core.datastore.impl.btree.BlockBTreeLeafNode)3 UpdateVO (org.apache.carbondata.core.mutate.UpdateVO)3