Search in sources :

Example 16 with CarbonFile

use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.

the class CarbonUpdateUtil method cleanStaleDeltaFiles.

/**
 * This will handle the clean up cases if the update fails.
 *
 * @param table
 * @param timeStamp
 */
public static void cleanStaleDeltaFiles(CarbonTable table, final String timeStamp) {
    AbsoluteTableIdentifier identifier = table.getAbsoluteTableIdentifier();
    String partitionDir = CarbonTablePath.getPartitionDir(identifier.getTablePath());
    CarbonFile file = FileFactory.getCarbonFile(partitionDir, FileFactory.getFileType(partitionDir));
    if (!file.exists()) {
        return;
    }
    for (CarbonFile eachDir : file.listFiles()) {
        // for each dir check if the file with the delta timestamp is present or not.
        CarbonFile[] toBeDeleted = eachDir.listFiles(new CarbonFileFilter() {

            @Override
            public boolean accept(CarbonFile file) {
                String fileName = file.getName();
                return (fileName.endsWith(timeStamp + CarbonCommonConstants.UPDATE_DELTA_FILE_EXT) || fileName.endsWith(timeStamp + CarbonCommonConstants.UPDATE_INDEX_FILE_EXT) || fileName.endsWith(timeStamp + CarbonCommonConstants.DELETE_DELTA_FILE_EXT));
            }
        });
        // deleting the files of a segment.
        try {
            CarbonUtil.deleteFoldersAndFilesSilent(toBeDeleted);
        } catch (IOException e) {
            LOGGER.error("Exception in deleting the delta files." + e);
        } catch (InterruptedException e) {
            LOGGER.error("Exception in deleting the delta files." + e);
        }
    }
}
Also used : CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) CarbonFileFilter(org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) IOException(java.io.IOException)

Example 17 with CarbonFile

use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.

the class CarbonUpdateUtil method cleanUpDeltaFiles.

/**
 * Handling of the clean up of old carbondata files, index files , delte delta,
 * update status files.
 * @param table clean up will be handled on this table.
 * @param forceDelete if true then max query execution timeout will not be considered.
 */
public static void cleanUpDeltaFiles(CarbonTable table, boolean forceDelete) throws IOException {
    SegmentStatusManager ssm = new SegmentStatusManager(table.getAbsoluteTableIdentifier());
    LoadMetadataDetails[] details = SegmentStatusManager.readLoadMetadata(table.getMetadataPath());
    String validUpdateStatusFile = "";
    boolean isAbortedFile = true;
    boolean isInvalidFile = false;
    List<Segment> segmentFilesToBeUpdated = new ArrayList<>();
    for (LoadMetadataDetails segment : details) {
        // take the update status file name from 0th segment.
        validUpdateStatusFile = ssm.getUpdateStatusFileName(details);
        if (segment.getSegmentStatus() == SegmentStatus.SUCCESS || segment.getSegmentStatus() == SegmentStatus.LOAD_PARTIAL_SUCCESS) {
            // take the list of files from this segment.
            String segmentPath = CarbonTablePath.getSegmentPath(table.getAbsoluteTableIdentifier().getTablePath(), segment.getLoadName());
            CarbonFile segDir = FileFactory.getCarbonFile(segmentPath, FileFactory.getFileType(segmentPath));
            CarbonFile[] allSegmentFiles = segDir.listFiles();
            // scan through the segment and find the carbondatafiles and index files.
            SegmentUpdateStatusManager updateStatusManager = new SegmentUpdateStatusManager(table);
            boolean updateSegmentFile = false;
            // deleting of the aborted file scenario.
            if (deleteStaleCarbonDataFiles(segment, allSegmentFiles, updateStatusManager)) {
                updateSegmentFile = true;
            }
            // get Invalid update  delta files.
            CarbonFile[] invalidUpdateDeltaFiles = updateStatusManager.getUpdateDeltaFilesList(segment.getLoadName(), false, CarbonCommonConstants.UPDATE_DELTA_FILE_EXT, true, allSegmentFiles, isInvalidFile);
            // and then delete.
            for (CarbonFile invalidFile : invalidUpdateDeltaFiles) {
                compareTimestampsAndDelete(invalidFile, forceDelete, false);
            }
            // do the same for the index files.
            CarbonFile[] invalidIndexFiles = updateStatusManager.getUpdateDeltaFilesList(segment.getLoadName(), false, CarbonCommonConstants.UPDATE_INDEX_FILE_EXT, true, allSegmentFiles, isInvalidFile);
            for (CarbonFile invalidFile : invalidIndexFiles) {
                if (compareTimestampsAndDelete(invalidFile, forceDelete, false)) {
                    updateSegmentFile = true;
                }
            }
            // now handle all the delete delta files which needs to be deleted.
            // there are 2 cases here .
            // 1. if the block is marked as compacted then the corresponding delta files
            // can be deleted if query exec timeout is done.
            // 2. if the block is in success state then also there can be delete
            // delta compaction happened and old files can be deleted.
            SegmentUpdateDetails[] updateDetails = updateStatusManager.readLoadMetadata();
            for (SegmentUpdateDetails block : updateDetails) {
                CarbonFile[] completeListOfDeleteDeltaFiles;
                CarbonFile[] invalidDeleteDeltaFiles;
                if (!block.getSegmentName().equalsIgnoreCase(segment.getLoadName())) {
                    continue;
                }
                // aborted scenario.
                invalidDeleteDeltaFiles = updateStatusManager.getDeleteDeltaInvalidFilesList(block, false, allSegmentFiles, isAbortedFile);
                for (CarbonFile invalidFile : invalidDeleteDeltaFiles) {
                    boolean doForceDelete = true;
                    compareTimestampsAndDelete(invalidFile, doForceDelete, false);
                }
                // case 1
                if (CarbonUpdateUtil.isBlockInvalid(block.getSegmentStatus())) {
                    completeListOfDeleteDeltaFiles = updateStatusManager.getDeleteDeltaInvalidFilesList(block, true, allSegmentFiles, isInvalidFile);
                    for (CarbonFile invalidFile : completeListOfDeleteDeltaFiles) {
                        compareTimestampsAndDelete(invalidFile, forceDelete, false);
                    }
                    CarbonFile[] blockRelatedFiles = updateStatusManager.getAllBlockRelatedFiles(allSegmentFiles, block.getActualBlockName());
                    for (CarbonFile invalidFile : blockRelatedFiles) {
                        if (compareTimestampsAndDelete(invalidFile, forceDelete, false)) {
                            if (invalidFile.getName().endsWith(CarbonCommonConstants.UPDATE_INDEX_FILE_EXT)) {
                                updateSegmentFile = true;
                            }
                        }
                    }
                } else {
                    invalidDeleteDeltaFiles = updateStatusManager.getDeleteDeltaInvalidFilesList(block, false, allSegmentFiles, isInvalidFile);
                    for (CarbonFile invalidFile : invalidDeleteDeltaFiles) {
                        compareTimestampsAndDelete(invalidFile, forceDelete, false);
                    }
                }
            }
            if (updateSegmentFile) {
                segmentFilesToBeUpdated.add(Segment.toSegment(segment.getLoadName()));
            }
        }
    }
    String UUID = String.valueOf(System.currentTimeMillis());
    List<Segment> segmentFilesToBeUpdatedLatest = new ArrayList<>();
    for (Segment segment : segmentFilesToBeUpdated) {
        String file = SegmentFileStore.writeSegmentFile(table.getTablePath(), segment.getSegmentNo(), UUID);
        segmentFilesToBeUpdatedLatest.add(new Segment(segment.getSegmentNo(), file));
    }
    if (segmentFilesToBeUpdated.size() > 0) {
        updateTableMetadataStatus(new HashSet<Segment>(segmentFilesToBeUpdated), table, UUID, false, new ArrayList<Segment>(), segmentFilesToBeUpdatedLatest);
    }
    // delete the update table status files which are old.
    if (null != validUpdateStatusFile && !validUpdateStatusFile.isEmpty()) {
        final String updateStatusTimestamp = validUpdateStatusFile.substring(validUpdateStatusFile.lastIndexOf(CarbonCommonConstants.HYPHEN) + 1);
        String tablePath = table.getAbsoluteTableIdentifier().getTablePath();
        CarbonFile metaFolder = FileFactory.getCarbonFile(CarbonTablePath.getMetadataPath(tablePath), FileFactory.getFileType(CarbonTablePath.getMetadataPath(tablePath)));
        CarbonFile[] invalidUpdateStatusFiles = metaFolder.listFiles(new CarbonFileFilter() {

            @Override
            public boolean accept(CarbonFile file) {
                if (file.getName().startsWith(CarbonCommonConstants.TABLEUPDATESTATUS_FILENAME)) {
                    // we only send invalid ones to delete.
                    if (!file.getName().endsWith(updateStatusTimestamp)) {
                        return true;
                    }
                }
                return false;
            }
        });
        for (CarbonFile invalidFile : invalidUpdateStatusFiles) {
            compareTimestampsAndDelete(invalidFile, forceDelete, true);
        }
    }
}
Also used : CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) SegmentUpdateStatusManager(org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager) LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) ArrayList(java.util.ArrayList) SegmentStatusManager(org.apache.carbondata.core.statusmanager.SegmentStatusManager) Segment(org.apache.carbondata.core.datamap.Segment) CarbonFileFilter(org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter)

Example 18 with CarbonFile

use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.

the class StreamSegment method updateIndexFile.

/**
 * update carbonindex file after a stream batch.
 */
public static void updateIndexFile(String segmentDir) throws IOException {
    FileFactory.FileType fileType = FileFactory.getFileType(segmentDir);
    String filePath = CarbonTablePath.getCarbonStreamIndexFilePath(segmentDir);
    String tempFilePath = filePath + CarbonCommonConstants.TEMPWRITEFILEEXTENSION;
    CarbonIndexFileWriter writer = new CarbonIndexFileWriter();
    try {
        writer.openThriftWriter(tempFilePath);
        CarbonFile[] files = listDataFiles(segmentDir, fileType);
        BlockIndex blockIndex;
        for (CarbonFile file : files) {
            blockIndex = new BlockIndex();
            blockIndex.setFile_name(file.getName());
            blockIndex.setFile_size(file.getSize());
            // TODO need to collect these information
            blockIndex.setNum_rows(-1);
            blockIndex.setOffset(-1);
            blockIndex.setBlock_index(new BlockletIndex());
            writer.writeThrift(blockIndex);
        }
        writer.close();
        CarbonFile tempFile = FileFactory.getCarbonFile(tempFilePath, fileType);
        if (!tempFile.renameForce(filePath)) {
            throw new IOException("temporary file renaming failed, src=" + tempFilePath + ", dest=" + filePath);
        }
    } catch (IOException ex) {
        try {
            writer.close();
        } catch (IOException t) {
            LOGGER.error(t);
        }
        throw ex;
    }
}
Also used : CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) CarbonIndexFileWriter(org.apache.carbondata.core.writer.CarbonIndexFileWriter) BlockletIndex(org.apache.carbondata.format.BlockletIndex) IOException(java.io.IOException) BlockIndex(org.apache.carbondata.format.BlockIndex) FileFactory(org.apache.carbondata.core.datastore.impl.FileFactory)

Example 19 with CarbonFile

use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.

the class StreamSegment method size.

/**
 * calculate the size of the segment by the accumulation of data sizes in index file
 */
public static long size(String segmentDir) throws IOException {
    long size = 0;
    FileFactory.FileType fileType = FileFactory.getFileType(segmentDir);
    if (FileFactory.isFileExist(segmentDir, fileType)) {
        String indexPath = CarbonTablePath.getCarbonStreamIndexFilePath(segmentDir);
        CarbonFile index = FileFactory.getCarbonFile(indexPath, fileType);
        if (index.exists()) {
            CarbonIndexFileReader indexReader = new CarbonIndexFileReader();
            try {
                indexReader.openThriftReader(indexPath);
                while (indexReader.hasNext()) {
                    BlockIndex blockIndex = indexReader.readBlockIndexInfo();
                    size += blockIndex.getFile_size();
                }
            } finally {
                indexReader.closeThriftReader();
            }
        }
    }
    return size;
}
Also used : CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) CarbonIndexFileReader(org.apache.carbondata.core.reader.CarbonIndexFileReader) BlockIndex(org.apache.carbondata.format.BlockIndex) FileFactory(org.apache.carbondata.core.datastore.impl.FileFactory)

Example 20 with CarbonFile

use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.

the class StreamSegment method recoverSegmentIfRequired.

/**
 * check the health of stream segment and try to recover segment from job fault
 * this method will be invoked in following scenarios.
 * 1. at the begin of the streaming (StreamSinkFactory.getStreamSegmentId)
 * 2. after job failed (CarbonAppendableStreamSink.writeDataFileJob)
 */
public static void recoverSegmentIfRequired(String segmentDir) throws IOException {
    FileFactory.FileType fileType = FileFactory.getFileType(segmentDir);
    if (FileFactory.isFileExist(segmentDir, fileType)) {
        String indexName = CarbonTablePath.getCarbonStreamIndexFileName();
        String indexPath = segmentDir + File.separator + indexName;
        CarbonFile index = FileFactory.getCarbonFile(indexPath, fileType);
        CarbonFile[] files = listDataFiles(segmentDir, fileType);
        // index file exists
        if (index.exists()) {
            // data file exists
            if (files.length > 0) {
                CarbonIndexFileReader indexReader = new CarbonIndexFileReader();
                try {
                    // map block index
                    indexReader.openThriftReader(indexPath);
                    Map<String, Long> tableSizeMap = new HashMap<>();
                    while (indexReader.hasNext()) {
                        BlockIndex blockIndex = indexReader.readBlockIndexInfo();
                        tableSizeMap.put(blockIndex.getFile_name(), blockIndex.getFile_size());
                    }
                    // recover each file
                    for (CarbonFile file : files) {
                        Long size = tableSizeMap.get(file.getName());
                        if (null == size || size == 0) {
                            file.delete();
                        } else if (size < file.getSize()) {
                            FileFactory.truncateFile(file.getCanonicalPath(), fileType, size);
                        }
                    }
                } finally {
                    indexReader.closeThriftReader();
                }
            }
        } else {
            if (files.length > 0) {
                for (CarbonFile file : files) {
                    file.delete();
                }
            }
        }
    }
}
Also used : CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) HashMap(java.util.HashMap) CarbonIndexFileReader(org.apache.carbondata.core.reader.CarbonIndexFileReader) BlockIndex(org.apache.carbondata.format.BlockIndex) FileFactory(org.apache.carbondata.core.datastore.impl.FileFactory)

Aggregations

CarbonFile (org.apache.carbondata.core.datastore.filesystem.CarbonFile)91 CarbonFileFilter (org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter)32 IOException (java.io.IOException)24 FileFactory (org.apache.carbondata.core.datastore.impl.FileFactory)17 ArrayList (java.util.ArrayList)14 CarbonTablePath (org.apache.carbondata.core.util.path.CarbonTablePath)11 HashMap (java.util.HashMap)7 Path (org.apache.hadoop.fs.Path)7 LoadMetadataDetails (org.apache.carbondata.core.statusmanager.LoadMetadataDetails)6 Map (java.util.Map)5 Segment (org.apache.carbondata.core.datamap.Segment)5 FileType (org.apache.carbondata.core.datastore.impl.FileFactory.FileType)5 BlockIndex (org.apache.carbondata.format.BlockIndex)5 HashSet (java.util.HashSet)4 CarbonIndexFileReader (org.apache.carbondata.core.reader.CarbonIndexFileReader)4 SegmentIndexFileStore (org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore)3 AbsoluteTableIdentifier (org.apache.carbondata.core.metadata.AbsoluteTableIdentifier)3 SegmentFileStore (org.apache.carbondata.core.metadata.SegmentFileStore)3 SegmentUpdateDetails (org.apache.carbondata.core.mutate.SegmentUpdateDetails)3 SegmentUpdateStatusManager (org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager)3