Search in sources :

Example 51 with CarbonFile

use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.

the class StreamSegment method size.

/**
 * calculate the size of the segment by the accumulation of data sizes in index file
 */
public static long size(String segmentDir) throws IOException {
    long size = 0;
    if (FileFactory.isFileExist(segmentDir)) {
        String indexPath = CarbonTablePath.getCarbonStreamIndexFilePath(segmentDir);
        CarbonFile index = FileFactory.getCarbonFile(indexPath);
        if (index.exists()) {
            CarbonIndexFileReader indexReader = new CarbonIndexFileReader();
            try {
                indexReader.openThriftReader(indexPath);
                while (indexReader.hasNext()) {
                    BlockIndex blockIndex = indexReader.readBlockIndexInfo();
                    size += blockIndex.getFile_size();
                }
            } finally {
                indexReader.closeThriftReader();
            }
        }
    }
    return size;
}
Also used : CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) CarbonIndexFileReader(org.apache.carbondata.core.reader.CarbonIndexFileReader) BlockIndex(org.apache.carbondata.format.BlockIndex)

Example 52 with CarbonFile

use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.

the class StreamSegment method recoverFileIfRequired.

/**
 * check the health of stream data file and try to recover data file from task fault
 *  this method will be invoked in following scenarios.
 *  1. at the begin of writing data file task
 */
public static void recoverFileIfRequired(String segmentDir, String fileName, String indexName) throws IOException {
    String filePath = segmentDir + File.separator + fileName;
    CarbonFile file = FileFactory.getCarbonFile(filePath);
    String indexPath = segmentDir + File.separator + indexName;
    CarbonFile index = FileFactory.getCarbonFile(indexPath);
    if (file.exists() && index.exists()) {
        CarbonIndexFileReader indexReader = new CarbonIndexFileReader();
        try {
            indexReader.openThriftReader(indexPath);
            while (indexReader.hasNext()) {
                BlockIndex blockIndex = indexReader.readBlockIndexInfo();
                if (blockIndex.getFile_name().equals(fileName)) {
                    if (blockIndex.getFile_size() == 0) {
                        file.delete();
                    } else if (blockIndex.getFile_size() < file.getSize()) {
                        FileFactory.truncateFile(filePath, blockIndex.getFile_size());
                    }
                    break;
                }
            }
        } finally {
            indexReader.closeThriftReader();
        }
    }
}
Also used : CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) CarbonIndexFileReader(org.apache.carbondata.core.reader.CarbonIndexFileReader) BlockIndex(org.apache.carbondata.format.BlockIndex)

Example 53 with CarbonFile

use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.

the class StreamSegment method readIndexFile.

/**
 * read index file to list BlockIndex
 *
 * @param indexPath path of the index file
 * @return the list of BlockIndex in the index file
 * @throws IOException failed to read index file
 */
public static List<BlockIndex> readIndexFile(String indexPath) throws IOException {
    List<BlockIndex> blockIndexList = new ArrayList<>();
    CarbonFile index = FileFactory.getCarbonFile(indexPath);
    if (index.exists()) {
        CarbonIndexFileReader indexReader = new CarbonIndexFileReader();
        try {
            indexReader.openThriftReader(indexPath);
            while (indexReader.hasNext()) {
                blockIndexList.add(indexReader.readBlockIndexInfo());
            }
        } finally {
            indexReader.closeThriftReader();
        }
    }
    return blockIndexList;
}
Also used : CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) CarbonIndexFileReader(org.apache.carbondata.core.reader.CarbonIndexFileReader) ArrayList(java.util.ArrayList) BlockIndex(org.apache.carbondata.format.BlockIndex)

Example 54 with CarbonFile

use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.

the class StreamSegment method recoverSegmentIfRequired.

/**
 * check the health of stream segment and try to recover segment from job fault
 * this method will be invoked in following scenarios.
 * 1. at the begin of the streaming (StreamSinkFactory.getStreamSegmentId)
 * 2. after job failed (CarbonAppendableStreamSink.writeDataFileJob)
 */
public static void recoverSegmentIfRequired(String segmentDir) throws IOException {
    if (FileFactory.isFileExist(segmentDir)) {
        String indexName = CarbonTablePath.getCarbonStreamIndexFileName();
        String indexPath = segmentDir + File.separator + indexName;
        CarbonFile index = FileFactory.getCarbonFile(indexPath);
        CarbonFile[] files = listDataFiles(segmentDir);
        // index file exists
        if (index.exists()) {
            // data file exists
            if (files.length > 0) {
                CarbonIndexFileReader indexReader = new CarbonIndexFileReader();
                try {
                    // map block index
                    indexReader.openThriftReader(indexPath);
                    Map<String, Long> tableSizeMap = new HashMap<>();
                    while (indexReader.hasNext()) {
                        BlockIndex blockIndex = indexReader.readBlockIndexInfo();
                        tableSizeMap.put(blockIndex.getFile_name(), blockIndex.getFile_size());
                    }
                    // recover each file
                    for (CarbonFile file : files) {
                        Long size = tableSizeMap.get(file.getName());
                        if (null == size || size == 0) {
                            file.delete();
                        } else if (size < file.getSize()) {
                            FileFactory.truncateFile(file.getCanonicalPath(), size);
                        }
                    }
                } finally {
                    indexReader.closeThriftReader();
                }
            }
        } else {
            if (files.length > 0) {
                for (CarbonFile file : files) {
                    file.delete();
                }
            }
        }
    }
}
Also used : CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) HashMap(java.util.HashMap) CarbonIndexFileReader(org.apache.carbondata.core.reader.CarbonIndexFileReader) BlockIndex(org.apache.carbondata.format.BlockIndex)

Example 55 with CarbonFile

use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.

the class CarbonSchemaReaderTest method testReadSchemaFromDataFile.

@Test
public void testReadSchemaFromDataFile() {
    try {
        CarbonFile[] carbonFiles = FileFactory.getCarbonFile(path).listFiles(new CarbonFileFilter() {

            @Override
            public boolean accept(CarbonFile file) {
                if (file == null) {
                    return false;
                }
                return file.getName().endsWith(".carbondata");
            }
        });
        if (carbonFiles == null || carbonFiles.length < 1) {
            throw new RuntimeException("Carbon data file not exists.");
        }
        String dataFilePath = carbonFiles[0].getAbsolutePath();
        Schema schema = CarbonSchemaReader.readSchema(dataFilePath).asOriginOrder();
        assertEquals(schema.getFieldsLength(), 12);
        checkSchema(schema);
    } catch (Throwable e) {
        e.printStackTrace();
        Assert.fail();
    }
}
Also used : CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) CarbonFileFilter(org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter) Test(org.junit.Test)

Aggregations

CarbonFile (org.apache.carbondata.core.datastore.filesystem.CarbonFile)161 IOException (java.io.IOException)47 CarbonFileFilter (org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter)45 ArrayList (java.util.ArrayList)38 HashMap (java.util.HashMap)20 FileFactory (org.apache.carbondata.core.datastore.impl.FileFactory)18 CarbonTablePath (org.apache.carbondata.core.util.path.CarbonTablePath)18 Path (org.apache.hadoop.fs.Path)15 List (java.util.List)11 CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)11 Map (java.util.Map)10 HashSet (java.util.HashSet)9 LoadMetadataDetails (org.apache.carbondata.core.statusmanager.LoadMetadataDetails)9 LinkedList (java.util.LinkedList)6 BlockIndex (org.apache.carbondata.format.BlockIndex)6 Segment (org.apache.carbondata.core.index.Segment)5 CarbonIndexFileReader (org.apache.carbondata.core.reader.CarbonIndexFileReader)5 Configuration (org.apache.hadoop.conf.Configuration)5 FileSystem (org.apache.hadoop.fs.FileSystem)5 Test (org.junit.Test)5