Search in sources :

Example 1 with Segment

use of org.apache.carbondata.core.index.Segment in project carbondata by apache.

the class CarbonUpdateUtil method updateTableMetadataStatus.

/**
 * @param updatedSegmentsList
 * @param table
 * @param updatedTimeStamp
 * @param isTimestampUpdateRequired
 * @param segmentsToBeDeleted
 * @return
 */
public static boolean updateTableMetadataStatus(Set<Segment> updatedSegmentsList, CarbonTable table, String updatedTimeStamp, boolean isTimestampUpdateRequired, boolean isUpdateStatusFileUpdateRequired, List<Segment> segmentsToBeDeleted, List<Segment> segmentFilesTobeUpdated, String uuid) {
    boolean status = false;
    String metaDataFilepath = table.getMetadataPath();
    AbsoluteTableIdentifier identifier = table.getAbsoluteTableIdentifier();
    String tableStatusPath = CarbonTablePath.getTableStatusFilePathWithUUID(identifier.getTablePath(), uuid);
    SegmentStatusManager segmentStatusManager = new SegmentStatusManager(identifier);
    ICarbonLock carbonLock = segmentStatusManager.getTableStatusLock();
    boolean lockStatus = false;
    try {
        lockStatus = carbonLock.lockWithRetries();
        if (lockStatus) {
            LOGGER.info("Acquired lock for table" + table.getDatabaseName() + "." + table.getTableName() + " for table status update");
            LoadMetadataDetails[] listOfLoadFolderDetailsArray = SegmentStatusManager.readLoadMetadata(metaDataFilepath);
            // to update table status only when required.
            boolean isUpdateRequired = false;
            for (LoadMetadataDetails loadMetadata : listOfLoadFolderDetailsArray) {
                // we are storing the link between the 2 status files in the segment 0 only.
                if (isUpdateStatusFileUpdateRequired && loadMetadata.getLoadName().equalsIgnoreCase("0")) {
                    loadMetadata.setUpdateStatusFileName(CarbonUpdateUtil.getUpdateStatusFileName(updatedTimeStamp));
                }
                if (isTimestampUpdateRequired) {
                    // if the segments is in the list of marked for delete then update the status.
                    if (segmentsToBeDeleted.contains(new Segment(loadMetadata.getLoadName()))) {
                        loadMetadata.setSegmentStatus(SegmentStatus.MARKED_FOR_DELETE);
                        loadMetadata.setModificationOrDeletionTimestamp(Long.parseLong(updatedTimeStamp));
                        isUpdateRequired = true;
                    }
                }
                for (Segment segName : updatedSegmentsList) {
                    if (loadMetadata.getLoadName().equalsIgnoreCase(segName.getSegmentNo())) {
                        // String will come empty then no need to write into table status file.
                        if (isTimestampUpdateRequired) {
                            // if in case of update flow.
                            if (loadMetadata.getUpdateDeltaStartTimestamp().isEmpty()) {
                                // this means for first time it is getting updated .
                                loadMetadata.setUpdateDeltaStartTimestamp(updatedTimeStamp);
                            }
                            // update end timestamp for each time.
                            loadMetadata.setUpdateDeltaEndTimestamp(updatedTimeStamp);
                            isUpdateRequired = true;
                        }
                        if (segmentFilesTobeUpdated.contains(Segment.toSegment(loadMetadata.getLoadName(), null))) {
                            loadMetadata.setSegmentFile(loadMetadata.getLoadName() + "_" + updatedTimeStamp + CarbonTablePath.SEGMENT_EXT);
                            isUpdateRequired = true;
                        }
                    }
                }
            }
            try {
                if (isUpdateRequired || isUpdateStatusFileUpdateRequired) {
                    SegmentStatusManager.writeLoadDetailsIntoFile(tableStatusPath, listOfLoadFolderDetailsArray);
                }
            } catch (IOException e) {
                return false;
            }
            status = true;
        } else {
            LOGGER.error("Not able to acquire the lock for Table status update for table " + table.getDatabaseName() + "." + table.getTableName());
        }
    } finally {
        if (lockStatus) {
            if (carbonLock.unlock()) {
                LOGGER.info("Table unlocked successfully after table status update" + table.getDatabaseName() + "." + table.getTableName());
            } else {
                LOGGER.error("Unable to unlock Table lock for table" + table.getDatabaseName() + "." + table.getTableName() + " during table status update");
            }
        }
    }
    return status;
}
Also used : ICarbonLock(org.apache.carbondata.core.locks.ICarbonLock) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) SegmentStatusManager(org.apache.carbondata.core.statusmanager.SegmentStatusManager) IOException(java.io.IOException) Segment(org.apache.carbondata.core.index.Segment)

Example 2 with Segment

use of org.apache.carbondata.core.index.Segment in project carbondata by apache.

the class CarbonUpdateUtil method cleanUpDeltaFiles.

/**
 * Handling of the clean up of old carbondata files, index files , delete delta,
 * update status files.
 *
 * @param table       clean up will be handled on this table.
 * @param isDryRun if clean files dryRun selected, then only size will be shown,
 *                files will not be deleted.
 */
public static long cleanUpDeltaFiles(CarbonTable table, boolean isDryRun) throws IOException {
    SegmentStatusManager ssm = new SegmentStatusManager(table.getAbsoluteTableIdentifier());
    LoadMetadataDetails[] details = SegmentStatusManager.readLoadMetadata(table.getMetadataPath());
    long totalSizeDeleted = 0;
    ArrayList<CarbonFile> filesToBeDeleted = new ArrayList<>();
    SegmentUpdateStatusManager updateStatusManager = new SegmentUpdateStatusManager(table);
    SegmentUpdateDetails[] segmentUpdateDetails = updateStatusManager.getUpdateStatusDetails();
    // hold all the segments updated so that wen can check the delta files in them, ne need to
    // check the others.
    Set<String> updatedSegments = new HashSet<>();
    for (SegmentUpdateDetails updateDetails : segmentUpdateDetails) {
        updatedSegments.add(updateDetails.getSegmentName());
    }
    boolean isInvalidFile = false;
    // take the update status file name from 0th segment.
    String validUpdateStatusFile = ssm.getUpdateStatusFileName(details);
    if (table.isHivePartitionTable()) {
        List<CarbonFile> partitionList = Arrays.stream(FileFactory.getCarbonFile(table.getTablePath()).listFiles()).filter(partitionName -> partitionName.getName().contains("=")).collect(Collectors.toList());
        List<CarbonFile> totalDeltaFiles = new ArrayList<>();
        for (CarbonFile carbonFile : partitionList) {
            totalDeltaFiles.addAll(carbonFile.listFiles(true).stream().filter(fileName -> fileName.getName().endsWith(CarbonCommonConstants.DELETE_DELTA_FILE_EXT)).collect(Collectors.toList()));
        }
        SegmentUpdateDetails[] updateDetails = updateStatusManager.readLoadMetadata();
        // Case 1: When deleteDeltaStartTimestamp = deleteDeltaEndTimestamp. in this case only 1
        // delta file is present and deltaFileStamps is NULL
        // Case 2: When deleteDeltaStartTimestamp != deleteDeltaEndTimestamp. in this case more
        // than 1 delta files are present, then can blindly read deltaFilesStamps variable
        Arrays.stream(updateDetails).forEach(block -> {
            if (block.getDeleteDeltaStartTimestamp().equals(block.getDeleteDeltaEndTimestamp())) {
                totalDeltaFiles.removeIf(deltaFile -> deltaFile.getName().endsWith(block.getDeleteDeltaEndTimestamp() + CarbonCommonConstants.DELETE_DELTA_FILE_EXT));
            } else {
                block.getDeltaFileStamps().stream().forEach(fileName -> totalDeltaFiles.removeIf(deltaFile -> deltaFile.getName().endsWith(fileName + CarbonCommonConstants.DELETE_DELTA_FILE_EXT)));
            }
        });
        for (CarbonFile invalidFile : totalDeltaFiles) {
            totalSizeDeleted += invalidFile.getSize();
            filesToBeDeleted.add(invalidFile);
        }
    } else {
        for (LoadMetadataDetails segment : details) {
            // if the segment is mark for delete or compacted then any way it will get deleted.
            if (segment.getSegmentStatus() == SegmentStatus.SUCCESS || segment.getSegmentStatus() == SegmentStatus.LOAD_PARTIAL_SUCCESS) {
                // just check the update delta start timestamp and proceed if not empty
                if (!segment.getUpdateDeltaStartTimestamp().isEmpty() || updatedSegments.contains(segment.getLoadName())) {
                    // take the list of files from this segment.
                    String segmentPath = CarbonTablePath.getSegmentPath(table.getAbsoluteTableIdentifier().getTablePath(), segment.getLoadName());
                    CarbonFile segDir = FileFactory.getCarbonFile(segmentPath);
                    CarbonFile[] allSegmentFiles = segDir.listFiles();
                    SegmentUpdateDetails[] updateDetails = updateStatusManager.readLoadMetadata();
                    for (SegmentUpdateDetails block : updateDetails) {
                        CarbonFile[] invalidDeleteDeltaFiles;
                        if (!block.getSegmentName().equalsIgnoreCase(segment.getLoadName())) {
                            continue;
                        }
                        invalidDeleteDeltaFiles = updateStatusManager.getDeleteDeltaInvalidFilesList(block, false, allSegmentFiles, isInvalidFile);
                        for (CarbonFile invalidFile : invalidDeleteDeltaFiles) {
                            totalSizeDeleted += invalidFile.getSize();
                            filesToBeDeleted.add(invalidFile);
                        }
                    }
                }
            }
        }
    }
    // delete the update table status files which are old.
    if (null != validUpdateStatusFile && !validUpdateStatusFile.isEmpty()) {
        final String updateStatusTimestamp = validUpdateStatusFile.substring(validUpdateStatusFile.lastIndexOf(CarbonCommonConstants.HYPHEN) + 1);
        String tablePath = table.getAbsoluteTableIdentifier().getTablePath();
        CarbonFile metaFolder = FileFactory.getCarbonFile(CarbonTablePath.getMetadataPath(tablePath));
        CarbonFile[] invalidUpdateStatusFiles = metaFolder.listFiles(new CarbonFileFilter() {

            @Override
            public boolean accept(CarbonFile file) {
                if (file.getName().startsWith(CarbonCommonConstants.TABLEUPDATESTATUS_FILENAME)) {
                    // we only send invalid ones to delete.
                    return !file.getName().endsWith(updateStatusTimestamp);
                }
                return false;
            }
        });
        for (CarbonFile invalidFile : invalidUpdateStatusFiles) {
            totalSizeDeleted += invalidFile.getSize();
            filesToBeDeleted.add(invalidFile);
        }
    }
    if (!isDryRun) {
        for (CarbonFile invalidFile : filesToBeDeleted) {
            invalidFile.deleteFile();
        }
    }
    return totalSizeDeleted;
}
Also used : CarbonUtil(org.apache.carbondata.core.util.CarbonUtil) Arrays(java.util.Arrays) Segment(org.apache.carbondata.core.index.Segment) HashMap(java.util.HashMap) FileFactory(org.apache.carbondata.core.datastore.impl.FileFactory) SegmentStatus(org.apache.carbondata.core.statusmanager.SegmentStatus) LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) CarbonCommonConstants(org.apache.carbondata.core.constants.CarbonCommonConstants) SegmentFileStore(org.apache.carbondata.core.metadata.SegmentFileStore) Logger(org.apache.log4j.Logger) CollectionUtils(org.apache.commons.collections.CollectionUtils) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) SegmentUpdateStatusManager(org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager) LogServiceFactory(org.apache.carbondata.common.logging.LogServiceFactory) SegmentStatusManager(org.apache.carbondata.core.statusmanager.SegmentStatusManager) CarbonCommonConstantsInternal(org.apache.carbondata.core.constants.CarbonCommonConstantsInternal) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) CarbonTablePath(org.apache.carbondata.core.util.path.CarbonTablePath) Set(java.util.Set) ICarbonLock(org.apache.carbondata.core.locks.ICarbonLock) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) RowCountDetailsVO(org.apache.carbondata.core.mutate.data.RowCountDetailsVO) BlockMappingVO(org.apache.carbondata.core.mutate.data.BlockMappingVO) List(java.util.List) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) CarbonProperties(org.apache.carbondata.core.util.CarbonProperties) CarbonFileFilter(org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter) CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) SegmentUpdateStatusManager(org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager) LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) ArrayList(java.util.ArrayList) SegmentStatusManager(org.apache.carbondata.core.statusmanager.SegmentStatusManager) CarbonFileFilter(org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter) HashSet(java.util.HashSet)

Example 3 with Segment

use of org.apache.carbondata.core.index.Segment in project carbondata by apache.

the class TableStatusReadCommittedScope method getCommittedIndexFile.

@Override
public Map<String, String> getCommittedIndexFile(Segment segment) throws IOException {
    Map<String, String> indexFiles;
    SegmentFileStore fileStore = null;
    if (segment.getSegmentFileName() != null && !segment.getSegmentFileName().isEmpty()) {
        fileStore = new SegmentFileStore(identifier.getTablePath(), segment.getSegmentFileName());
    }
    if (segment.getSegmentFileName() == null || fileStore.getSegmentFile() == null) {
        String path = CarbonTablePath.getSegmentPath(identifier.getTablePath(), segment.getSegmentNo());
        indexFiles = new SegmentIndexFileStore().getMergeOrIndexFilesFromSegment(path);
        Set<String> mergedIndexFiles = SegmentFileStore.getInvalidAndMergedIndexFiles(new ArrayList<>(indexFiles.keySet()));
        Map<String, String> filteredIndexFiles = indexFiles;
        if (mergedIndexFiles.size() > 0) {
            // do not include already merged index files details.
            filteredIndexFiles = indexFiles.entrySet().stream().filter(indexFile -> !mergedIndexFiles.contains(indexFile.getKey())).collect(HashMap::new, (m, v) -> m.put(v.getKey(), v.getValue()), HashMap::putAll);
        }
        return filteredIndexFiles;
    } else {
        indexFiles = fileStore.getIndexAndMergeFiles();
        if (fileStore.getSegmentFile() != null) {
            segment.setSegmentMetaDataInfo(fileStore.getSegmentFile().getSegmentMetaDataInfo());
        }
    }
    return indexFiles;
}
Also used : InterfaceAudience(org.apache.carbondata.common.annotations.InterfaceAudience) UpdateVO(org.apache.carbondata.core.mutate.UpdateVO) InterfaceStability(org.apache.carbondata.common.annotations.InterfaceStability) Segment(org.apache.carbondata.core.index.Segment) CarbonTablePath(org.apache.carbondata.core.util.path.CarbonTablePath) Set(java.util.Set) IOException(java.io.IOException) HashMap(java.util.HashMap) SegmentIndexFileStore(org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore) SegmentRefreshInfo(org.apache.carbondata.core.statusmanager.SegmentRefreshInfo) LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) ArrayList(java.util.ArrayList) CarbonCommonConstants(org.apache.carbondata.core.constants.CarbonCommonConstants) SegmentFileStore(org.apache.carbondata.core.metadata.SegmentFileStore) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) Map(java.util.Map) Configuration(org.apache.hadoop.conf.Configuration) SegmentStatusManager(org.apache.carbondata.core.statusmanager.SegmentStatusManager) SegmentIndexFileStore(org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore) SegmentFileStore(org.apache.carbondata.core.metadata.SegmentFileStore)

Example 4 with Segment

use of org.apache.carbondata.core.index.Segment in project carbondata by apache.

the class CarbonUtil method validateRangeOfSegmentList.

public static boolean validateRangeOfSegmentList(String segmentId) throws InvalidConfigurationException {
    String[] values = segmentId.split(",");
    try {
        if (values.length == 0) {
            throw new InvalidConfigurationException("carbon.input.segments.<database_name>.<table_name> value can't be empty.");
        }
        for (String value : values) {
            if (!value.equalsIgnoreCase("*")) {
                Segment segment = Segment.toSegment(value, null);
                float aFloatValue = Float.parseFloat(segment.getSegmentNo());
                if (aFloatValue < 0 || aFloatValue > Float.MAX_VALUE) {
                    throw new InvalidConfigurationException("carbon.input.segments.<database_name>.<table_name> value range should be greater " + "than 0 and less than " + Float.MAX_VALUE);
                }
            }
        }
    } catch (NumberFormatException nfe) {
        throw new InvalidConfigurationException("carbon.input.segments.<database_name>.<table_name> value range is not valid");
    }
    return true;
}
Also used : Segment(org.apache.carbondata.core.index.Segment) InvalidConfigurationException(org.apache.carbondata.core.exception.InvalidConfigurationException)

Example 5 with Segment

use of org.apache.carbondata.core.index.Segment in project carbondata by apache.

the class CarbonUtil method getFormatVersion.

/**
 * This method get the carbon file format version
 *
 * @param carbonTable
 * carbon Table
 */
public static ColumnarFormatVersion getFormatVersion(CarbonTable carbonTable) throws IOException {
    String segmentPath = null;
    boolean supportFlatFolder = carbonTable.isSupportFlatFolder();
    CarbonIndexFileReader indexReader = new CarbonIndexFileReader();
    ColumnarFormatVersion version = null;
    SegmentIndexFileStore fileStore = new SegmentIndexFileStore();
    CarbonProperties carbonProperties = CarbonProperties.getInstance();
    // if the carbon table is support flat folder
    if (supportFlatFolder) {
        segmentPath = carbonTable.getTablePath();
        if (FileFactory.isFileExist(segmentPath)) {
            fileStore.readAllIIndexOfSegment(segmentPath);
            Map<String, byte[]> carbonIndexMap = fileStore.getCarbonIndexMap();
            if (carbonIndexMap.size() == 0) {
                version = carbonProperties.getFormatVersion();
            }
            for (byte[] fileData : carbonIndexMap.values()) {
                try {
                    indexReader.openThriftReader(fileData);
                    IndexHeader indexHeader = indexReader.readIndexHeader();
                    version = ColumnarFormatVersion.valueOf((short) indexHeader.getVersion());
                    break;
                } finally {
                    indexReader.closeThriftReader();
                }
            }
        }
    } else {
        // get the valid segments
        SegmentStatusManager segmentStatusManager = new SegmentStatusManager(carbonTable.getAbsoluteTableIdentifier());
        SegmentStatusManager.ValidAndInvalidSegmentsInfo validAndInvalidSegmentsInfo = segmentStatusManager.getValidAndInvalidSegments(carbonTable.isMV());
        List<Segment> validSegments = validAndInvalidSegmentsInfo.getValidSegments();
        if (validSegments.isEmpty()) {
            return carbonProperties.getFormatVersion();
        }
        // get the carbon index file header from a valid segment
        for (Segment segment : validSegments) {
            segmentPath = carbonTable.getSegmentPath(segment.getSegmentNo());
            if (FileFactory.isFileExist(segmentPath)) {
                fileStore.readAllIIndexOfSegment(segmentPath);
                Map<String, byte[]> carbonIndexMap = fileStore.getCarbonIndexMap();
                if (carbonIndexMap.size() == 0) {
                    LOGGER.warn("the valid segment path: " + segmentPath + " does not exist in the system of table: " + carbonTable.getTableUniqueName());
                    continue;
                }
                for (byte[] fileData : carbonIndexMap.values()) {
                    try {
                        indexReader.openThriftReader(fileData);
                        IndexHeader indexHeader = indexReader.readIndexHeader();
                        version = ColumnarFormatVersion.valueOf((short) indexHeader.getVersion());
                        break;
                    } finally {
                        indexReader.closeThriftReader();
                    }
                }
                // if get the carbon file version from a valid segment, then end
                if (version != null) {
                    break;
                }
            }
        }
        // then the carbon file version as default
        if (version == null) {
            version = CarbonProperties.getInstance().getFormatVersion();
        }
    }
    return version;
}
Also used : IndexHeader(org.apache.carbondata.format.IndexHeader) SegmentIndexFileStore(org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore) CarbonIndexFileReader(org.apache.carbondata.core.reader.CarbonIndexFileReader) SegmentStatusManager(org.apache.carbondata.core.statusmanager.SegmentStatusManager) Segment(org.apache.carbondata.core.index.Segment) ColumnarFormatVersion(org.apache.carbondata.core.metadata.ColumnarFormatVersion)

Aggregations

Segment (org.apache.carbondata.core.index.Segment)35 ArrayList (java.util.ArrayList)24 IOException (java.io.IOException)18 LoadMetadataDetails (org.apache.carbondata.core.statusmanager.LoadMetadataDetails)14 SegmentStatusManager (org.apache.carbondata.core.statusmanager.SegmentStatusManager)11 HashMap (java.util.HashMap)10 List (java.util.List)9 Map (java.util.Map)8 AbsoluteTableIdentifier (org.apache.carbondata.core.metadata.AbsoluteTableIdentifier)8 CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)8 SegmentUpdateStatusManager (org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager)8 CarbonCommonConstants (org.apache.carbondata.core.constants.CarbonCommonConstants)7 HashSet (java.util.HashSet)6 CarbonFile (org.apache.carbondata.core.datastore.filesystem.CarbonFile)6 FileFactory (org.apache.carbondata.core.datastore.impl.FileFactory)6 TableIndex (org.apache.carbondata.core.index.TableIndex)6 Collectors (java.util.stream.Collectors)5 LogServiceFactory (org.apache.carbondata.common.logging.LogServiceFactory)5 IndexFilter (org.apache.carbondata.core.index.IndexFilter)5 PartitionSpec (org.apache.carbondata.core.indexstore.PartitionSpec)5