Search in sources :

Example 1 with Segment

use of org.apache.carbondata.core.datamap.Segment in project carbondata by apache.

the class CarbonUpdateUtil method updateTableMetadataStatus.

/**
 * @param updatedSegmentsList
 * @param table
 * @param updatedTimeStamp
 * @param isTimestampUpdationRequired
 * @param segmentsToBeDeleted
 * @return
 */
public static boolean updateTableMetadataStatus(Set<Segment> updatedSegmentsList, CarbonTable table, String updatedTimeStamp, boolean isTimestampUpdationRequired, List<Segment> segmentsToBeDeleted, List<Segment> segmentFilesTobeUpdated) {
    boolean status = false;
    String metaDataFilepath = table.getMetadataPath();
    AbsoluteTableIdentifier identifier = table.getAbsoluteTableIdentifier();
    String tableStatusPath = CarbonTablePath.getTableStatusFilePath(identifier.getTablePath());
    SegmentStatusManager segmentStatusManager = new SegmentStatusManager(identifier);
    ICarbonLock carbonLock = segmentStatusManager.getTableStatusLock();
    boolean lockStatus = false;
    try {
        lockStatus = carbonLock.lockWithRetries();
        if (lockStatus) {
            LOGGER.info("Acquired lock for table" + table.getDatabaseName() + "." + table.getTableName() + " for table status updation");
            LoadMetadataDetails[] listOfLoadFolderDetailsArray = SegmentStatusManager.readLoadMetadata(metaDataFilepath);
            for (LoadMetadataDetails loadMetadata : listOfLoadFolderDetailsArray) {
                if (isTimestampUpdationRequired) {
                    // we are storing the link between the 2 status files in the segment 0 only.
                    if (loadMetadata.getLoadName().equalsIgnoreCase("0")) {
                        loadMetadata.setUpdateStatusFileName(CarbonUpdateUtil.getUpdateStatusFileName(updatedTimeStamp));
                    }
                    // if the segments is in the list of marked for delete then update the status.
                    if (segmentsToBeDeleted.contains(new Segment(loadMetadata.getLoadName(), null))) {
                        loadMetadata.setSegmentStatus(SegmentStatus.MARKED_FOR_DELETE);
                        loadMetadata.setModificationOrdeletionTimesStamp(Long.parseLong(updatedTimeStamp));
                    }
                }
                for (Segment segName : updatedSegmentsList) {
                    if (loadMetadata.getLoadName().equalsIgnoreCase(segName.getSegmentNo())) {
                        // String will come empty then no need to write into table status file.
                        if (isTimestampUpdationRequired) {
                            loadMetadata.setIsDeleted(CarbonCommonConstants.KEYWORD_TRUE);
                            // if in case of update flow.
                            if (loadMetadata.getUpdateDeltaStartTimestamp().isEmpty()) {
                                // this means for first time it is getting updated .
                                loadMetadata.setUpdateDeltaStartTimestamp(updatedTimeStamp);
                            }
                            // update end timestamp for each time.
                            loadMetadata.setUpdateDeltaEndTimestamp(updatedTimeStamp);
                        }
                        if (segmentFilesTobeUpdated.contains(Segment.toSegment(loadMetadata.getLoadName()))) {
                            loadMetadata.setSegmentFile(loadMetadata.getLoadName() + "_" + updatedTimeStamp + CarbonTablePath.SEGMENT_EXT);
                        }
                    }
                }
            }
            try {
                segmentStatusManager.writeLoadDetailsIntoFile(tableStatusPath, listOfLoadFolderDetailsArray);
            } catch (IOException e) {
                return false;
            }
            status = true;
        } else {
            LOGGER.error("Not able to acquire the lock for Table status updation for table " + table.getDatabaseName() + "." + table.getTableName());
        }
    } finally {
        if (lockStatus) {
            if (carbonLock.unlock()) {
                LOGGER.info("Table unlocked successfully after table status updation" + table.getDatabaseName() + "." + table.getTableName());
            } else {
                LOGGER.error("Unable to unlock Table lock for table" + table.getDatabaseName() + "." + table.getTableName() + " during table status updation");
            }
        }
    }
    return status;
}
Also used : ICarbonLock(org.apache.carbondata.core.locks.ICarbonLock) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) SegmentStatusManager(org.apache.carbondata.core.statusmanager.SegmentStatusManager) IOException(java.io.IOException) Segment(org.apache.carbondata.core.datamap.Segment)

Example 2 with Segment

use of org.apache.carbondata.core.datamap.Segment in project carbondata by apache.

the class CarbonUpdateUtil method cleanUpDeltaFiles.

/**
 * Handling of the clean up of old carbondata files, index files , delte delta,
 * update status files.
 * @param table clean up will be handled on this table.
 * @param forceDelete if true then max query execution timeout will not be considered.
 */
public static void cleanUpDeltaFiles(CarbonTable table, boolean forceDelete) throws IOException {
    SegmentStatusManager ssm = new SegmentStatusManager(table.getAbsoluteTableIdentifier());
    LoadMetadataDetails[] details = SegmentStatusManager.readLoadMetadata(table.getMetadataPath());
    String validUpdateStatusFile = "";
    boolean isAbortedFile = true;
    boolean isInvalidFile = false;
    List<Segment> segmentFilesToBeUpdated = new ArrayList<>();
    for (LoadMetadataDetails segment : details) {
        // take the update status file name from 0th segment.
        validUpdateStatusFile = ssm.getUpdateStatusFileName(details);
        if (segment.getSegmentStatus() == SegmentStatus.SUCCESS || segment.getSegmentStatus() == SegmentStatus.LOAD_PARTIAL_SUCCESS) {
            // take the list of files from this segment.
            String segmentPath = CarbonTablePath.getSegmentPath(table.getAbsoluteTableIdentifier().getTablePath(), segment.getLoadName());
            CarbonFile segDir = FileFactory.getCarbonFile(segmentPath, FileFactory.getFileType(segmentPath));
            CarbonFile[] allSegmentFiles = segDir.listFiles();
            // scan through the segment and find the carbondatafiles and index files.
            SegmentUpdateStatusManager updateStatusManager = new SegmentUpdateStatusManager(table);
            boolean updateSegmentFile = false;
            // deleting of the aborted file scenario.
            if (deleteStaleCarbonDataFiles(segment, allSegmentFiles, updateStatusManager)) {
                updateSegmentFile = true;
            }
            // get Invalid update  delta files.
            CarbonFile[] invalidUpdateDeltaFiles = updateStatusManager.getUpdateDeltaFilesList(segment.getLoadName(), false, CarbonCommonConstants.UPDATE_DELTA_FILE_EXT, true, allSegmentFiles, isInvalidFile);
            // and then delete.
            for (CarbonFile invalidFile : invalidUpdateDeltaFiles) {
                compareTimestampsAndDelete(invalidFile, forceDelete, false);
            }
            // do the same for the index files.
            CarbonFile[] invalidIndexFiles = updateStatusManager.getUpdateDeltaFilesList(segment.getLoadName(), false, CarbonCommonConstants.UPDATE_INDEX_FILE_EXT, true, allSegmentFiles, isInvalidFile);
            for (CarbonFile invalidFile : invalidIndexFiles) {
                if (compareTimestampsAndDelete(invalidFile, forceDelete, false)) {
                    updateSegmentFile = true;
                }
            }
            // now handle all the delete delta files which needs to be deleted.
            // there are 2 cases here .
            // 1. if the block is marked as compacted then the corresponding delta files
            // can be deleted if query exec timeout is done.
            // 2. if the block is in success state then also there can be delete
            // delta compaction happened and old files can be deleted.
            SegmentUpdateDetails[] updateDetails = updateStatusManager.readLoadMetadata();
            for (SegmentUpdateDetails block : updateDetails) {
                CarbonFile[] completeListOfDeleteDeltaFiles;
                CarbonFile[] invalidDeleteDeltaFiles;
                if (!block.getSegmentName().equalsIgnoreCase(segment.getLoadName())) {
                    continue;
                }
                // aborted scenario.
                invalidDeleteDeltaFiles = updateStatusManager.getDeleteDeltaInvalidFilesList(block, false, allSegmentFiles, isAbortedFile);
                for (CarbonFile invalidFile : invalidDeleteDeltaFiles) {
                    boolean doForceDelete = true;
                    compareTimestampsAndDelete(invalidFile, doForceDelete, false);
                }
                // case 1
                if (CarbonUpdateUtil.isBlockInvalid(block.getSegmentStatus())) {
                    completeListOfDeleteDeltaFiles = updateStatusManager.getDeleteDeltaInvalidFilesList(block, true, allSegmentFiles, isInvalidFile);
                    for (CarbonFile invalidFile : completeListOfDeleteDeltaFiles) {
                        compareTimestampsAndDelete(invalidFile, forceDelete, false);
                    }
                    CarbonFile[] blockRelatedFiles = updateStatusManager.getAllBlockRelatedFiles(allSegmentFiles, block.getActualBlockName());
                    for (CarbonFile invalidFile : blockRelatedFiles) {
                        if (compareTimestampsAndDelete(invalidFile, forceDelete, false)) {
                            if (invalidFile.getName().endsWith(CarbonCommonConstants.UPDATE_INDEX_FILE_EXT)) {
                                updateSegmentFile = true;
                            }
                        }
                    }
                } else {
                    invalidDeleteDeltaFiles = updateStatusManager.getDeleteDeltaInvalidFilesList(block, false, allSegmentFiles, isInvalidFile);
                    for (CarbonFile invalidFile : invalidDeleteDeltaFiles) {
                        compareTimestampsAndDelete(invalidFile, forceDelete, false);
                    }
                }
            }
            if (updateSegmentFile) {
                segmentFilesToBeUpdated.add(Segment.toSegment(segment.getLoadName()));
            }
        }
    }
    String UUID = String.valueOf(System.currentTimeMillis());
    List<Segment> segmentFilesToBeUpdatedLatest = new ArrayList<>();
    for (Segment segment : segmentFilesToBeUpdated) {
        String file = SegmentFileStore.writeSegmentFile(table.getTablePath(), segment.getSegmentNo(), UUID);
        segmentFilesToBeUpdatedLatest.add(new Segment(segment.getSegmentNo(), file));
    }
    if (segmentFilesToBeUpdated.size() > 0) {
        updateTableMetadataStatus(new HashSet<Segment>(segmentFilesToBeUpdated), table, UUID, false, new ArrayList<Segment>(), segmentFilesToBeUpdatedLatest);
    }
    // delete the update table status files which are old.
    if (null != validUpdateStatusFile && !validUpdateStatusFile.isEmpty()) {
        final String updateStatusTimestamp = validUpdateStatusFile.substring(validUpdateStatusFile.lastIndexOf(CarbonCommonConstants.HYPHEN) + 1);
        String tablePath = table.getAbsoluteTableIdentifier().getTablePath();
        CarbonFile metaFolder = FileFactory.getCarbonFile(CarbonTablePath.getMetadataPath(tablePath), FileFactory.getFileType(CarbonTablePath.getMetadataPath(tablePath)));
        CarbonFile[] invalidUpdateStatusFiles = metaFolder.listFiles(new CarbonFileFilter() {

            @Override
            public boolean accept(CarbonFile file) {
                if (file.getName().startsWith(CarbonCommonConstants.TABLEUPDATESTATUS_FILENAME)) {
                    // we only send invalid ones to delete.
                    if (!file.getName().endsWith(updateStatusTimestamp)) {
                        return true;
                    }
                }
                return false;
            }
        });
        for (CarbonFile invalidFile : invalidUpdateStatusFiles) {
            compareTimestampsAndDelete(invalidFile, forceDelete, true);
        }
    }
}
Also used : CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) SegmentUpdateStatusManager(org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager) LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) ArrayList(java.util.ArrayList) SegmentStatusManager(org.apache.carbondata.core.statusmanager.SegmentStatusManager) Segment(org.apache.carbondata.core.datamap.Segment) CarbonFileFilter(org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter)

Example 3 with Segment

use of org.apache.carbondata.core.datamap.Segment in project carbondata by apache.

the class CarbonUtilTest method testToGetSegmentString.

@Test
public void testToGetSegmentString() {
    List<Segment> list = new ArrayList<>();
    list.add(new Segment("1", null));
    list.add(new Segment("2", null));
    String segments = CarbonUtil.convertToString(list);
    assertEquals(segments, "1,2");
}
Also used : ArrayList(java.util.ArrayList) Segment(org.apache.carbondata.core.datamap.Segment) Test(org.junit.Test)

Example 4 with Segment

use of org.apache.carbondata.core.datamap.Segment in project carbondata by apache.

the class CarbonFileInputFormat method getSplits.

/**
 * {@inheritDoc}
 * Configurations FileInputFormat.INPUT_DIR
 * are used to get table path to read.
 *
 * @param job
 * @return List<InputSplit> list of CarbonInputSplit
 * @throws IOException
 */
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    AbsoluteTableIdentifier identifier = getAbsoluteTableIdentifier(job.getConfiguration());
    CarbonTable carbonTable = getOrCreateCarbonTable(job.getConfiguration());
    if (null == carbonTable) {
        throw new IOException("Missing/Corrupt schema file for table.");
    }
    if (getValidateSegmentsToAccess(job.getConfiguration())) {
        // get all valid segments and set them into the configuration
        // check for externalTable segment (Segment_null)
        // process and resolve the expression
        Expression filter = getFilterPredicates(job.getConfiguration());
        TableProvider tableProvider = new SingleTableProvider(carbonTable);
        // this will be null in case of corrupt schema file.
        PartitionInfo partitionInfo = carbonTable.getPartitionInfo(carbonTable.getTableName());
        carbonTable.processFilterExpression(filter, null, null);
        FilterResolverIntf filterInterface = carbonTable.resolveFilter(filter, tableProvider);
        String segmentDir = CarbonTablePath.getSegmentPath(identifier.getTablePath(), "null");
        FileFactory.FileType fileType = FileFactory.getFileType(segmentDir);
        if (FileFactory.isFileExist(segmentDir, fileType)) {
            // if external table Segments are found, add it to the List
            List<Segment> externalTableSegments = new ArrayList<Segment>();
            Segment seg = new Segment("null", null);
            externalTableSegments.add(seg);
            Map<String, String> indexFiles = new SegmentIndexFileStore().getIndexFilesFromSegment(segmentDir);
            if (indexFiles.size() == 0) {
                throw new RuntimeException("Index file not present to read the carbondata file");
            }
            // do block filtering and get split
            List<InputSplit> splits = getSplits(job, filterInterface, externalTableSegments, null, partitionInfo, null);
            return splits;
        }
    }
    return null;
}
Also used : SegmentIndexFileStore(org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore) ArrayList(java.util.ArrayList) IOException(java.io.IOException) SingleTableProvider(org.apache.carbondata.core.scan.filter.SingleTableProvider) TableProvider(org.apache.carbondata.core.scan.filter.TableProvider) Segment(org.apache.carbondata.core.datamap.Segment) FileFactory(org.apache.carbondata.core.datastore.impl.FileFactory) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) SingleTableProvider(org.apache.carbondata.core.scan.filter.SingleTableProvider) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) Expression(org.apache.carbondata.core.scan.expression.Expression) PartitionInfo(org.apache.carbondata.core.metadata.schema.PartitionInfo) InputSplit(org.apache.hadoop.mapreduce.InputSplit) CarbonInputSplit(org.apache.carbondata.hadoop.CarbonInputSplit) FilterResolverIntf(org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf)

Example 5 with Segment

use of org.apache.carbondata.core.datamap.Segment in project carbondata by apache.

the class CarbonOutputCommitter method overwritePartitions.

/**
 * Overwrite the partitions in case of overwrite query. It just updates the partition map files
 * of all segment files.
 *
 * @param loadModel
 * @return
 * @throws IOException
 */
private String overwritePartitions(CarbonLoadModel loadModel, LoadMetadataDetails newMetaEntry) throws IOException {
    CarbonTable table = loadModel.getCarbonDataLoadSchema().getCarbonTable();
    SegmentFileStore fileStore = new SegmentFileStore(loadModel.getTablePath(), loadModel.getSegmentId() + "_" + loadModel.getFactTimeStamp() + CarbonTablePath.SEGMENT_EXT);
    List<PartitionSpec> partitionSpecs = fileStore.getPartitionSpecs();
    if (partitionSpecs != null && partitionSpecs.size() > 0) {
        List<Segment> validSegments = new SegmentStatusManager(table.getAbsoluteTableIdentifier()).getValidAndInvalidSegments().getValidSegments();
        String uniqueId = String.valueOf(System.currentTimeMillis());
        List<String> tobeUpdatedSegs = new ArrayList<>();
        List<String> tobeDeletedSegs = new ArrayList<>();
        // First drop the partitions from partition mapper files of each segment
        for (Segment segment : validSegments) {
            new SegmentFileStore(table.getTablePath(), segment.getSegmentFileName()).dropPartitions(segment, partitionSpecs, uniqueId, tobeDeletedSegs, tobeUpdatedSegs);
        }
        newMetaEntry.setUpdateStatusFileName(uniqueId);
        // Commit the removed partitions in carbon store.
        CarbonLoaderUtil.recordNewLoadMetadata(newMetaEntry, loadModel, false, false, "", Segment.toSegmentList(tobeDeletedSegs), Segment.toSegmentList(tobeUpdatedSegs));
        return uniqueId;
    }
    return null;
}
Also used : CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) ArrayList(java.util.ArrayList) SegmentFileStore(org.apache.carbondata.core.metadata.SegmentFileStore) SegmentStatusManager(org.apache.carbondata.core.statusmanager.SegmentStatusManager) PartitionSpec(org.apache.carbondata.core.indexstore.PartitionSpec) Segment(org.apache.carbondata.core.datamap.Segment)

Aggregations

Segment (org.apache.carbondata.core.datamap.Segment)23 ArrayList (java.util.ArrayList)10 IOException (java.io.IOException)8 LoadMetadataDetails (org.apache.carbondata.core.statusmanager.LoadMetadataDetails)8 SegmentStatusManager (org.apache.carbondata.core.statusmanager.SegmentStatusManager)8 AbsoluteTableIdentifier (org.apache.carbondata.core.metadata.AbsoluteTableIdentifier)6 CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)6 CarbonFile (org.apache.carbondata.core.datastore.filesystem.CarbonFile)5 SegmentUpdateStatusManager (org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager)5 CarbonInputSplit (org.apache.carbondata.hadoop.CarbonInputSplit)4 InputSplit (org.apache.hadoop.mapreduce.InputSplit)4 SegmentFileStore (org.apache.carbondata.core.metadata.SegmentFileStore)3 Expression (org.apache.carbondata.core.scan.expression.Expression)3 SingleTableProvider (org.apache.carbondata.core.scan.filter.SingleTableProvider)3 TableProvider (org.apache.carbondata.core.scan.filter.TableProvider)3 FilterResolverIntf (org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf)3 BitSet (java.util.BitSet)2 HashSet (java.util.HashSet)2 FileFactory (org.apache.carbondata.core.datastore.impl.FileFactory)2 ICarbonLock (org.apache.carbondata.core.locks.ICarbonLock)2