Search in sources :

Example 21 with CarbonFile

use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.

the class StreamSegment method recoverFileIfRequired.

/**
 * check the health of stream data file and try to recover data file from task fault
 *  this method will be invoked in following scenarios.
 *  1. at the begin of writing data file task
 */
public static void recoverFileIfRequired(String segmentDir, String fileName, String indexName) throws IOException {
    FileFactory.FileType fileType = FileFactory.getFileType(segmentDir);
    String filePath = segmentDir + File.separator + fileName;
    CarbonFile file = FileFactory.getCarbonFile(filePath, fileType);
    String indexPath = segmentDir + File.separator + indexName;
    CarbonFile index = FileFactory.getCarbonFile(indexPath, fileType);
    if (file.exists() && index.exists()) {
        CarbonIndexFileReader indexReader = new CarbonIndexFileReader();
        try {
            indexReader.openThriftReader(indexPath);
            while (indexReader.hasNext()) {
                BlockIndex blockIndex = indexReader.readBlockIndexInfo();
                if (blockIndex.getFile_name().equals(fileName)) {
                    if (blockIndex.getFile_size() == 0) {
                        file.delete();
                    } else if (blockIndex.getFile_size() < file.getSize()) {
                        FileFactory.truncateFile(filePath, fileType, blockIndex.getFile_size());
                    }
                }
            }
        } finally {
            indexReader.closeThriftReader();
        }
    }
}
Also used : CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) CarbonIndexFileReader(org.apache.carbondata.core.reader.CarbonIndexFileReader) BlockIndex(org.apache.carbondata.format.BlockIndex) FileFactory(org.apache.carbondata.core.datastore.impl.FileFactory)

Example 22 with CarbonFile

use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.

the class ManageDictionaryAndBTree method deleteDictionaryFileAndCache.

/**
 * This method will delete the dictionary files for the given column IDs and
 * clear the dictionary cache
 *
 * @param columnSchema
 * @param identifier
 */
public static void deleteDictionaryFileAndCache(final ColumnSchema columnSchema, AbsoluteTableIdentifier identifier) {
    String metadataDirectoryPath = CarbonTablePath.getMetadataPath(identifier.getTablePath());
    CarbonFile metadataDir = FileFactory.getCarbonFile(metadataDirectoryPath, FileFactory.getFileType(metadataDirectoryPath));
    if (metadataDir.exists()) {
        // sort index file is created with dictionary size appended to it. So all the files
        // with a given column ID need to be listed
        CarbonFile[] listFiles = metadataDir.listFiles(new CarbonFileFilter() {

            @Override
            public boolean accept(CarbonFile path) {
                if (path.getName().startsWith(columnSchema.getColumnUniqueId())) {
                    return true;
                }
                return false;
            }
        });
        for (CarbonFile file : listFiles) {
            // still need to be deleted
            try {
                FileFactory.deleteFile(file.getCanonicalPath(), FileFactory.getFileType(file.getCanonicalPath()));
            } catch (IOException e) {
                LOGGER.error("Failed to delete dictionary or sortIndex file for column " + columnSchema.getColumnName() + "with column ID " + columnSchema.getColumnUniqueId());
            }
        }
    }
    // remove dictionary cache
    removeDictionaryColumnFromCache(identifier, columnSchema.getColumnUniqueId());
}
Also used : CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) CarbonFileFilter(org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter) IOException(java.io.IOException)

Example 23 with CarbonFile

use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.

the class CarbonTableInputFormat method getSplitsOfStreaming.

/**
 * use file list in .carbonindex file to get the split of streaming.
 */
public List<InputSplit> getSplitsOfStreaming(JobContext job, AbsoluteTableIdentifier identifier, List<Segment> streamSegments) throws IOException {
    List<InputSplit> splits = new ArrayList<InputSplit>();
    if (streamSegments != null && !streamSegments.isEmpty()) {
        numStreamSegments = streamSegments.size();
        long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
        long maxSize = getMaxSplitSize(job);
        for (Segment segment : streamSegments) {
            String segmentDir = CarbonTablePath.getSegmentPath(identifier.getTablePath(), segment.getSegmentNo());
            FileFactory.FileType fileType = FileFactory.getFileType(segmentDir);
            if (FileFactory.isFileExist(segmentDir, fileType)) {
                String indexName = CarbonTablePath.getCarbonStreamIndexFileName();
                String indexPath = segmentDir + File.separator + indexName;
                CarbonFile index = FileFactory.getCarbonFile(indexPath, fileType);
                // index file exists
                if (index.exists()) {
                    // data file exists
                    CarbonIndexFileReader indexReader = new CarbonIndexFileReader();
                    try {
                        // map block index
                        indexReader.openThriftReader(indexPath);
                        while (indexReader.hasNext()) {
                            BlockIndex blockIndex = indexReader.readBlockIndexInfo();
                            String filePath = segmentDir + File.separator + blockIndex.getFile_name();
                            Path path = new Path(filePath);
                            long length = blockIndex.getFile_size();
                            if (length != 0) {
                                BlockLocation[] blkLocations;
                                FileSystem fs = FileFactory.getFileSystem(path);
                                FileStatus file = fs.getFileStatus(path);
                                blkLocations = fs.getFileBlockLocations(path, 0, length);
                                long blockSize = file.getBlockSize();
                                long splitSize = computeSplitSize(blockSize, minSize, maxSize);
                                long bytesRemaining = length;
                                while (((double) bytesRemaining) / splitSize > 1.1) {
                                    int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
                                    splits.add(makeSplit(segment.getSegmentNo(), path, length - bytesRemaining, splitSize, blkLocations[blkIndex].getHosts(), blkLocations[blkIndex].getCachedHosts(), FileFormat.ROW_V1));
                                    bytesRemaining -= splitSize;
                                }
                                if (bytesRemaining != 0) {
                                    int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
                                    splits.add(makeSplit(segment.getSegmentNo(), path, length - bytesRemaining, bytesRemaining, blkLocations[blkIndex].getHosts(), blkLocations[blkIndex].getCachedHosts(), FileFormat.ROW_V1));
                                }
                            } else {
                                // Create empty hosts array for zero length files
                                splits.add(makeSplit(segment.getSegmentNo(), path, 0, length, new String[0], FileFormat.ROW_V1));
                            }
                        }
                    } finally {
                        indexReader.closeThriftReader();
                    }
                }
            }
        }
    }
    return splits;
}
Also used : Path(org.apache.hadoop.fs.Path) CarbonTablePath(org.apache.carbondata.core.util.path.CarbonTablePath) CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) FileStatus(org.apache.hadoop.fs.FileStatus) CarbonIndexFileReader(org.apache.carbondata.core.reader.CarbonIndexFileReader) ArrayList(java.util.ArrayList) BlockLocation(org.apache.hadoop.fs.BlockLocation) BlockIndex(org.apache.carbondata.format.BlockIndex) Segment(org.apache.carbondata.core.datamap.Segment) FileFactory(org.apache.carbondata.core.datastore.impl.FileFactory) FileSystem(org.apache.hadoop.fs.FileSystem) InputSplit(org.apache.hadoop.mapreduce.InputSplit) CarbonInputSplit(org.apache.carbondata.hadoop.CarbonInputSplit)

Example 24 with CarbonFile

use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.

the class MinMaxIndexDataMap method getCarbonMinMaxIndexFiles.

private CarbonFile[] getCarbonMinMaxIndexFiles(String filePath, String segmentId) {
    String path = filePath.substring(0, filePath.lastIndexOf("/") + 1);
    CarbonFile carbonFile = FileFactory.getCarbonFile(path);
    return carbonFile.listFiles(new CarbonFileFilter() {

        @Override
        public boolean accept(CarbonFile file) {
            return file.getName().endsWith(".minmaxindex");
        }
    });
}
Also used : CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) CarbonFileFilter(org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter)

Example 25 with CarbonFile

use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.

the class FileFactory method truncateFile.

/**
 * this method will truncate the file to the new size.
 * @param path
 * @param fileType
 * @param newSize
 * @throws IOException
 */
public static void truncateFile(String path, FileType fileType, long newSize) throws IOException {
    path = path.replace("\\", "/");
    FileChannel fileChannel = null;
    switch(fileType) {
        case LOCAL:
            path = getUpdatedFilePath(path, fileType);
            fileChannel = new FileOutputStream(path, true).getChannel();
            try {
                fileChannel.truncate(newSize);
            } finally {
                if (fileChannel != null) {
                    fileChannel.close();
                }
            }
            return;
        case HDFS:
        case ALLUXIO:
        case VIEWFS:
        case S3:
            // this method was new in hadoop 2.7, otherwise use CarbonFile.truncate to do this.
            try {
                Path pt = new Path(path);
                FileSystem fs = pt.getFileSystem(configuration);
                Method truncateMethod = fs.getClass().getDeclaredMethod("truncate", new Class[] { Path.class, long.class });
                truncateMethod.invoke(fs, new Object[] { pt, newSize });
            } catch (NoSuchMethodException e) {
                LOGGER.error("the version of hadoop is below 2.7, there is no 'truncate'" + " method in FileSystem, It needs to use 'CarbonFile.truncate'.");
                CarbonFile carbonFile = FileFactory.getCarbonFile(path, fileType);
                carbonFile.truncate(path, newSize);
            } catch (Exception e) {
                LOGGER.error("Other exception occurred while truncating the file " + e.getMessage());
            }
            return;
        default:
            fileChannel = new FileOutputStream(path, true).getChannel();
            try {
                fileChannel.truncate(newSize);
            } finally {
                if (fileChannel != null) {
                    fileChannel.close();
                }
            }
            return;
    }
}
Also used : Path(org.apache.hadoop.fs.Path) CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) FileChannel(java.nio.channels.FileChannel) FileOutputStream(java.io.FileOutputStream) FileSystem(org.apache.hadoop.fs.FileSystem) Method(java.lang.reflect.Method) IOException(java.io.IOException)

Aggregations

CarbonFile (org.apache.carbondata.core.datastore.filesystem.CarbonFile)91 CarbonFileFilter (org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter)32 IOException (java.io.IOException)24 FileFactory (org.apache.carbondata.core.datastore.impl.FileFactory)17 ArrayList (java.util.ArrayList)14 CarbonTablePath (org.apache.carbondata.core.util.path.CarbonTablePath)11 HashMap (java.util.HashMap)7 Path (org.apache.hadoop.fs.Path)7 LoadMetadataDetails (org.apache.carbondata.core.statusmanager.LoadMetadataDetails)6 Map (java.util.Map)5 Segment (org.apache.carbondata.core.datamap.Segment)5 FileType (org.apache.carbondata.core.datastore.impl.FileFactory.FileType)5 BlockIndex (org.apache.carbondata.format.BlockIndex)5 HashSet (java.util.HashSet)4 CarbonIndexFileReader (org.apache.carbondata.core.reader.CarbonIndexFileReader)4 SegmentIndexFileStore (org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore)3 AbsoluteTableIdentifier (org.apache.carbondata.core.metadata.AbsoluteTableIdentifier)3 SegmentFileStore (org.apache.carbondata.core.metadata.SegmentFileStore)3 SegmentUpdateDetails (org.apache.carbondata.core.mutate.SegmentUpdateDetails)3 SegmentUpdateStatusManager (org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager)3