Search in sources :

Example 31 with CarbonFile

use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.

the class SegmentUpdateStatusManager method getUpdateDeltaFilesList.

/**
 * Returns all update delta files of specified Segment.
 *
 * @param segmentId
 * @param validUpdateFiles if true then only the valid range files will be returned.
 * @return
 */
public CarbonFile[] getUpdateDeltaFilesList(String segmentId, final boolean validUpdateFiles, final String fileExtension, final boolean excludeOriginalFact, CarbonFile[] allFilesOfSegment, boolean isAbortedFile) {
    String endTimeStamp = "";
    String startTimeStamp = "";
    long factTimeStamp = 0;
    LoadMetadataDetails[] segmentDetails = SegmentStatusManager.readLoadMetadata(CarbonTablePath.getMetadataPath(identifier.getTablePath()));
    for (LoadMetadataDetails eachSeg : segmentDetails) {
        if (eachSeg.getLoadName().equalsIgnoreCase(segmentId)) {
            // if the segment is found then take the start and end time stamp.
            startTimeStamp = eachSeg.getUpdateDeltaStartTimestamp();
            endTimeStamp = eachSeg.getUpdateDeltaEndTimestamp();
            factTimeStamp = eachSeg.getLoadStartTime();
        }
    }
    // if start timestamp is empty then no update delta is found. so return empty list.
    if (startTimeStamp.isEmpty()) {
        return new CarbonFile[0];
    }
    final Long endTimeStampFinal = CarbonUpdateUtil.getTimeStampAsLong(endTimeStamp);
    final Long startTimeStampFinal = CarbonUpdateUtil.getTimeStampAsLong(startTimeStamp);
    final long factTimeStampFinal = factTimeStamp;
    List<CarbonFile> listOfCarbonFiles = new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
    for (CarbonFile eachFile : allFilesOfSegment) {
        String fileName = eachFile.getName();
        if (fileName.endsWith(fileExtension)) {
            String firstPart = fileName.substring(0, fileName.indexOf('.'));
            long timestamp = Long.parseLong(firstPart.substring(firstPart.lastIndexOf(CarbonCommonConstants.HYPHEN) + 1, firstPart.length()));
            if (excludeOriginalFact) {
                if (Long.compare(factTimeStampFinal, timestamp) == 0) {
                    continue;
                }
            }
            if (validUpdateFiles) {
                if (Long.compare(timestamp, endTimeStampFinal) <= 0 && Long.compare(timestamp, startTimeStampFinal) >= 0) {
                    listOfCarbonFiles.add(eachFile);
                }
            } else {
                // invalid cases.
                if (isAbortedFile) {
                    if (Long.compare(timestamp, endTimeStampFinal) > 0) {
                        listOfCarbonFiles.add(eachFile);
                    }
                } else if (Long.compare(timestamp, startTimeStampFinal) < 0 || Long.compare(timestamp, endTimeStampFinal) > 0) {
                    listOfCarbonFiles.add(eachFile);
                }
            }
        }
    }
    return listOfCarbonFiles.toArray(new CarbonFile[listOfCarbonFiles.size()]);
}
Also used : CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) ArrayList(java.util.ArrayList)

Example 32 with CarbonFile

use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.

the class SegmentUpdateStatusManager method getUpdateDeltaFilesForSegment.

/**
 * Returns all update delta files of specified Segment.
 *
 * @param segmentId
 * @param validUpdateFiles
 * @param fileExtension
 * @param excludeOriginalFact
 * @param allFilesOfSegment
 * @return
 */
public CarbonFile[] getUpdateDeltaFilesForSegment(String segmentId, final boolean validUpdateFiles, final String fileExtension, final boolean excludeOriginalFact, CarbonFile[] allFilesOfSegment) {
    String endTimeStamp = "";
    String startTimeStamp = "";
    long factTimeStamp = 0;
    for (LoadMetadataDetails eachSeg : segmentDetails) {
        if (eachSeg.getLoadName().equalsIgnoreCase(segmentId)) {
            // if the segment is found then take the start and end time stamp.
            startTimeStamp = eachSeg.getUpdateDeltaStartTimestamp();
            endTimeStamp = eachSeg.getUpdateDeltaEndTimestamp();
            factTimeStamp = eachSeg.getLoadStartTime();
        }
    }
    // if start timestamp is empty then no update delta is found. so return empty list.
    if (startTimeStamp.isEmpty()) {
        return new CarbonFile[0];
    }
    final Long endTimeStampFinal = CarbonUpdateUtil.getTimeStampAsLong(endTimeStamp);
    final Long startTimeStampFinal = CarbonUpdateUtil.getTimeStampAsLong(startTimeStamp);
    final long factTimeStampFinal = factTimeStamp;
    List<CarbonFile> listOfCarbonFiles = new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
    for (CarbonFile eachFile : allFilesOfSegment) {
        String fileName = eachFile.getName();
        if (fileName.endsWith(fileExtension)) {
            String firstPart = fileName.substring(0, fileName.indexOf('.'));
            long timestamp = Long.parseLong(firstPart.substring(firstPart.lastIndexOf(CarbonCommonConstants.HYPHEN) + 1, firstPart.length()));
            if (excludeOriginalFact) {
                if (Long.compare(factTimeStampFinal, timestamp) == 0) {
                    continue;
                }
            }
            if (validUpdateFiles) {
                if (Long.compare(timestamp, endTimeStampFinal) <= 0 && Long.compare(timestamp, startTimeStampFinal) >= 0) {
                    boolean validBlock = true;
                    for (SegmentUpdateDetails blockDetails : getUpdateStatusDetails()) {
                        if (blockDetails.getActualBlockName().equalsIgnoreCase(eachFile.getName()) && CarbonUpdateUtil.isBlockInvalid(blockDetails.getSegmentStatus())) {
                            validBlock = false;
                        }
                    }
                    if (validBlock) {
                        listOfCarbonFiles.add(eachFile);
                    }
                }
            } else {
                // invalid cases.
                if (Long.compare(timestamp, startTimeStampFinal) < 0) {
                    listOfCarbonFiles.add(eachFile);
                }
            }
        }
    }
    return listOfCarbonFiles.toArray(new CarbonFile[listOfCarbonFiles.size()]);
}
Also used : CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) SegmentUpdateDetails(org.apache.carbondata.core.mutate.SegmentUpdateDetails) ArrayList(java.util.ArrayList)

Example 33 with CarbonFile

use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.

the class SegmentUpdateStatusManager method getDeleteDeltaInvalidFilesList.

/**
 * @param block
 * @param needCompleteList
 * @return
 */
public CarbonFile[] getDeleteDeltaInvalidFilesList(final SegmentUpdateDetails block, final boolean needCompleteList, CarbonFile[] allSegmentFiles, boolean isAbortedFile) {
    final long deltaStartTimestamp = getStartTimeOfDeltaFile(CarbonCommonConstants.DELETE_DELTA_FILE_EXT, block);
    final long deltaEndTimestamp = getEndTimeOfDeltaFile(CarbonCommonConstants.DELETE_DELTA_FILE_EXT, block);
    List<CarbonFile> files = new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
    for (CarbonFile eachFile : allSegmentFiles) {
        String fileName = eachFile.getName();
        if (fileName.endsWith(CarbonCommonConstants.DELETE_DELTA_FILE_EXT)) {
            String blkName = CarbonTablePath.DataFileUtil.getBlockNameFromDeleteDeltaFile(fileName);
            // complete list of delta files of that block is returned.
            if (needCompleteList && block.getBlockName().equalsIgnoreCase(blkName)) {
                files.add(eachFile);
            }
            // invalid delete delta files only will be returned.
            long timestamp = CarbonUpdateUtil.getTimeStampAsLong(CarbonTablePath.DataFileUtil.getTimeStampFromDeleteDeltaFile(fileName));
            if (block.getBlockName().equalsIgnoreCase(blkName)) {
                if (isAbortedFile) {
                    if (Long.compare(timestamp, deltaEndTimestamp) > 0) {
                        files.add(eachFile);
                    }
                } else if (Long.compare(timestamp, deltaStartTimestamp) < 0 || Long.compare(timestamp, deltaEndTimestamp) > 0) {
                    files.add(eachFile);
                }
            }
        }
    }
    return files.toArray(new CarbonFile[files.size()]);
}
Also used : CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) ArrayList(java.util.ArrayList)

Example 34 with CarbonFile

use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.

the class CarbonDictionaryWriterImpl method validateDictionaryFileOffsetWithLastSegmentEntryOffset.

/**
 * if file already exists then read metadata file and
 * validate the last entry end offset with file size. If
 * they are not equal that means some invalid data is present which needs
 * to be truncated
 *
 * @throws IOException if an I/O error occurs
 */
private void validateDictionaryFileOffsetWithLastSegmentEntryOffset() throws IOException {
    // read last dictionary chunk meta entry from dictionary metadata file
    chunkMetaObjectForLastSegmentEntry = getChunkMetaObjectForLastSegmentEntry();
    int bytesToTruncate = 0;
    if (null != chunkMetaObjectForLastSegmentEntry) {
        bytesToTruncate = (int) (chunk_start_offset - chunkMetaObjectForLastSegmentEntry.getEnd_offset());
    }
    if (bytesToTruncate > 0) {
        LOGGER.info("some inconsistency in dictionary file for column " + this.dictionaryColumnUniqueIdentifier.getColumnIdentifier());
        // truncate the dictionary data till chunk meta end offset
        FileFactory.FileType fileType = FileFactory.getFileType(this.dictionaryFilePath);
        CarbonFile carbonFile = FileFactory.getCarbonFile(this.dictionaryFilePath, fileType);
        boolean truncateSuccess = carbonFile.truncate(this.dictionaryFilePath, chunkMetaObjectForLastSegmentEntry.getEnd_offset());
        if (!truncateSuccess) {
            LOGGER.info("Diction file not truncated successfully for column " + this.dictionaryColumnUniqueIdentifier.getColumnIdentifier());
        }
    }
}
Also used : CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) FileFactory(org.apache.carbondata.core.datastore.impl.FileFactory)

Example 35 with CarbonFile

use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.

the class CarbonIndexFileMergeWriter method writeMergeIndexFileBasedOnSegmentFolder.

private String writeMergeIndexFileBasedOnSegmentFolder(List<String> indexFileNamesTobeAdded, boolean readFileFooterFromCarbonDataFile, String segmentPath, CarbonFile[] indexFiles) throws IOException {
    SegmentIndexFileStore fileStore = new SegmentIndexFileStore();
    if (readFileFooterFromCarbonDataFile) {
        // this case will be used in case of upgrade where old store will not have the blocklet
        // info in the index file and therefore blocklet info need to be read from the file footer
        // in the carbondata file
        fileStore.readAllIndexAndFillBolckletInfo(segmentPath);
    } else {
        fileStore.readAllIIndexOfSegment(segmentPath);
    }
    Map<String, byte[]> indexMap = fileStore.getCarbonIndexMap();
    writeMergeIndexFile(indexFileNamesTobeAdded, segmentPath, indexMap);
    for (CarbonFile indexFile : indexFiles) {
        indexFile.delete();
    }
    return null;
}
Also used : CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) SegmentIndexFileStore(org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore)

Aggregations

CarbonFile (org.apache.carbondata.core.datastore.filesystem.CarbonFile)91 CarbonFileFilter (org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter)32 IOException (java.io.IOException)24 FileFactory (org.apache.carbondata.core.datastore.impl.FileFactory)17 ArrayList (java.util.ArrayList)14 CarbonTablePath (org.apache.carbondata.core.util.path.CarbonTablePath)11 HashMap (java.util.HashMap)7 Path (org.apache.hadoop.fs.Path)7 LoadMetadataDetails (org.apache.carbondata.core.statusmanager.LoadMetadataDetails)6 Map (java.util.Map)5 Segment (org.apache.carbondata.core.datamap.Segment)5 FileType (org.apache.carbondata.core.datastore.impl.FileFactory.FileType)5 BlockIndex (org.apache.carbondata.format.BlockIndex)5 HashSet (java.util.HashSet)4 CarbonIndexFileReader (org.apache.carbondata.core.reader.CarbonIndexFileReader)4 SegmentIndexFileStore (org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore)3 AbsoluteTableIdentifier (org.apache.carbondata.core.metadata.AbsoluteTableIdentifier)3 SegmentFileStore (org.apache.carbondata.core.metadata.SegmentFileStore)3 SegmentUpdateDetails (org.apache.carbondata.core.mutate.SegmentUpdateDetails)3 SegmentUpdateStatusManager (org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager)3