Search in sources :

Example 81 with CarbonFile

use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.

the class CarbonDataMergerUtil method getSizeOfFactFileInLoad.

/**
 * Returns the size of all the carbondata files present in the segment.
 * @param carbonFile
 * @return
 */
private static long getSizeOfFactFileInLoad(CarbonFile carbonFile) {
    long factSize = 0;
    // carbon data file case.
    CarbonFile[] factFile = carbonFile.listFiles(new CarbonFileFilter() {

        @Override
        public boolean accept(CarbonFile file) {
            return CarbonTablePath.isCarbonDataFile(file.getName());
        }
    });
    for (CarbonFile fact : factFile) {
        factSize += fact.getSize();
    }
    return factSize;
}
Also used : CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) CarbonFileFilter(org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter)

Example 82 with CarbonFile

use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.

the class CarbonTableReader method parseCarbonMetadata.

/**
 * Read the metadata of the given table
 * and cache it in this.carbonCache (CarbonTableReader cache).
 *
 * @param table name of the given table.
 * @return the CarbonTableCacheModel instance which contains all the needed metadata for a table.
 */
private CarbonTableCacheModel parseCarbonMetadata(SchemaTableName table, String tablePath, Configuration config) {
    try {
        CarbonTableCacheModel cache = getValidCacheBySchemaTableName(table);
        if (cache != null) {
            return cache;
        }
        // multiple tasks can be launched in a worker concurrently. Hence need to synchronize this.
        synchronized (this) {
            // cache might be filled by another thread, so if filled use that cache.
            CarbonTableCacheModel cacheModel = getValidCacheBySchemaTableName(table);
            if (cacheModel != null) {
                return cacheModel;
            }
            // Step 1: get store path of the table and cache it.
            String schemaFilePath = CarbonTablePath.getSchemaFilePath(tablePath, config);
            // If metadata folder exists, it is a transactional table
            CarbonFile schemaFile = FileFactory.getCarbonFile(schemaFilePath, config);
            boolean isTransactionalTable = schemaFile.exists();
            org.apache.carbondata.format.TableInfo tableInfo;
            long modifiedTime = System.currentTimeMillis();
            if (isTransactionalTable) {
                // Step 2: read the metadata (tableInfo) of the table.
                ThriftReader.TBaseCreator createTBase = new ThriftReader.TBaseCreator() {

                    // TBase is used to read and write thrift objects.
                    // TableInfo is a kind of TBase used to read and write table information.
                    // TableInfo is generated by thrift,
                    // see schema.thrift under format/src/main/thrift for details.
                    public TBase create() {
                        return new org.apache.carbondata.format.TableInfo();
                    }
                };
                ThriftReader thriftReader = new ThriftReader(schemaFilePath, createTBase, config);
                thriftReader.open();
                tableInfo = (org.apache.carbondata.format.TableInfo) thriftReader.read();
                thriftReader.close();
                modifiedTime = schemaFile.getLastModifiedTime();
            } else {
                tableInfo = CarbonUtil.inferSchema(tablePath, table.getTableName(), false, config);
            }
            // Step 3: convert format level TableInfo to code level TableInfo
            SchemaConverter schemaConverter = new ThriftWrapperSchemaConverterImpl();
            // wrapperTableInfo is the code level information of a table in carbondata core,
            // different from the Thrift TableInfo.
            TableInfo wrapperTableInfo = schemaConverter.fromExternalToWrapperTableInfo(tableInfo, table.getSchemaName(), table.getTableName(), tablePath);
            wrapperTableInfo.setTransactionalTable(isTransactionalTable);
            CarbonMetadata.getInstance().removeTable(wrapperTableInfo.getTableUniqueName());
            // Step 4: Load metadata info into CarbonMetadata
            CarbonMetadata.getInstance().loadTableMetadata(wrapperTableInfo);
            CarbonTable carbonTable = Objects.requireNonNull(CarbonMetadata.getInstance().getCarbonTable(table.getSchemaName(), table.getTableName()), "carbontable is null");
            refreshIndexInfo(carbonTable, config);
            cache = new CarbonTableCacheModel(modifiedTime, carbonTable);
            // cache the table
            carbonCache.get().put(table, cache);
            cache.setCarbonTable(carbonTable);
        }
        return cache;
    } catch (Exception ex) {
        throw new RuntimeException(ex);
    }
}
Also used : CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) IOException(java.io.IOException) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) ThriftReader(org.apache.carbondata.core.reader.ThriftReader) SchemaConverter(org.apache.carbondata.core.metadata.converter.SchemaConverter) IndexTableInfo(org.apache.carbondata.core.metadata.schema.indextable.IndexTableInfo) TableInfo(org.apache.carbondata.core.metadata.schema.table.TableInfo) ThriftWrapperSchemaConverterImpl(org.apache.carbondata.core.metadata.converter.ThriftWrapperSchemaConverterImpl)

Example 83 with CarbonFile

use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.

the class SegmentIndexFileStore method getIndexFilesFromSegment.

/**
 * Read all index file names of the segment
 *
 * @param segmentPath
 * @return
 * @throws IOException
 */
public Map<String, String> getIndexFilesFromSegment(String segmentPath) throws IOException {
    CarbonFile[] carbonIndexFiles = getCarbonIndexFiles(segmentPath, FileFactory.getConfiguration());
    Map<String, String> indexFiles = new HashMap<>();
    for (CarbonFile carbonIndexFile : carbonIndexFiles) {
        if (carbonIndexFile.getName().endsWith(CarbonTablePath.MERGE_INDEX_FILE_EXT)) {
            List<String> indexFilesFromMergeFile = getIndexFilesFromMergeFile(carbonIndexFile.getCanonicalPath());
            for (String file : indexFilesFromMergeFile) {
                indexFiles.put(carbonIndexFile.getParentFile().getAbsolutePath() + CarbonCommonConstants.FILE_SEPARATOR + file, carbonIndexFile.getName());
            }
        } else if (carbonIndexFile.getName().endsWith(CarbonTablePath.INDEX_FILE_EXT)) {
            indexFiles.put(carbonIndexFile.getAbsolutePath(), null);
        }
    }
    return indexFiles;
}
Also used : CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) HashMap(java.util.HashMap)

Example 84 with CarbonFile

use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.

the class SegmentIndexFileStore method readAllIIndexOfSegment.

/**
 * Read all index files and keep the cache in it.
 *
 * @param segmentFile
 * @throws IOException
 */
public void readAllIIndexOfSegment(SegmentFileStore.SegmentFile segmentFile, String tablePath, SegmentStatus status, boolean ignoreStatus) throws IOException {
    List<CarbonFile> carbonIndexFiles = new ArrayList<>();
    Set<String> indexFiles = new HashSet<>();
    if (segmentFile == null) {
        return;
    }
    for (Map.Entry<String, SegmentFileStore.FolderDetails> locations : segmentFile.getLocationMap().entrySet()) {
        String location = locations.getKey();
        if (locations.getValue().getStatus().equals(status.getMessage()) || ignoreStatus) {
            if (locations.getValue().isRelative()) {
                location = tablePath + CarbonCommonConstants.FILE_SEPARATOR + location;
            }
            String mergeFileName = locations.getValue().getMergeFileName();
            if (mergeFileName != null) {
                CarbonFile mergeFile = FileFactory.getCarbonFile(location + CarbonCommonConstants.FILE_SEPARATOR + mergeFileName);
                if (mergeFile.exists() && !indexFiles.contains(mergeFile.getAbsolutePath())) {
                    carbonIndexFiles.add(mergeFile);
                    indexFiles.add(mergeFile.getAbsolutePath());
                }
            }
            for (String indexFile : locations.getValue().getFiles()) {
                CarbonFile carbonFile = FileFactory.getCarbonFile(location + CarbonCommonConstants.FILE_SEPARATOR + indexFile);
                if (carbonFile.exists() && !indexFiles.contains(carbonFile.getAbsolutePath())) {
                    carbonIndexFiles.add(carbonFile);
                    indexFiles.add(carbonFile.getAbsolutePath());
                }
            }
        }
    }
    for (CarbonFile carbonIndexFile : carbonIndexFiles) {
        if (carbonIndexFile.getName().endsWith(CarbonTablePath.MERGE_INDEX_FILE_EXT)) {
            readMergeFile(carbonIndexFile.getCanonicalPath());
        } else if (carbonIndexFile.getName().endsWith(CarbonTablePath.INDEX_FILE_EXT)) {
            readIndexFile(carbonIndexFile);
        }
    }
}
Also used : CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) ArrayList(java.util.ArrayList) HashMap(java.util.HashMap) Map(java.util.Map) TreeMap(java.util.TreeMap) HashSet(java.util.HashSet)

Example 85 with CarbonFile

use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.

the class SegmentIndexFileStore method readMergeFile.

/**
 * Read carbonindexmerge file and update the map
 *
 * @param mergeFilePath
 * @throws IOException
 */
public void readMergeFile(String mergeFilePath) throws IOException {
    ThriftReader thriftReader = new ThriftReader(mergeFilePath, configuration);
    try {
        thriftReader.open();
        MergedBlockIndexHeader indexHeader = readMergeBlockIndexHeader(thriftReader);
        MergedBlockIndex mergedBlockIndex = readMergeBlockIndex(thriftReader);
        List<String> file_names = indexHeader.getFile_names();
        carbonMergeFileToIndexFilesMap.put(mergeFilePath, file_names);
        List<ByteBuffer> fileData = mergedBlockIndex.getFileData();
        CarbonFile mergeFile = FileFactory.getCarbonFile(mergeFilePath, configuration);
        String mergeFileAbsolutePath = mergeFile.getParentFile().getAbsolutePath();
        assert (file_names.size() == fileData.size());
        for (int i = 0; i < file_names.size(); i++) {
            byte[] data = fileData.get(i).array();
            carbonIndexMap.put(file_names.get(i), data);
            carbonIndexMapWithFullPath.put(mergeFileAbsolutePath + CarbonCommonConstants.FILE_SEPARATOR + file_names.get(i), data);
        }
    } finally {
        thriftReader.close();
    }
}
Also used : ThriftReader(org.apache.carbondata.core.reader.ThriftReader) MergedBlockIndex(org.apache.carbondata.format.MergedBlockIndex) CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) MergedBlockIndexHeader(org.apache.carbondata.format.MergedBlockIndexHeader) ByteBuffer(java.nio.ByteBuffer)

Aggregations

CarbonFile (org.apache.carbondata.core.datastore.filesystem.CarbonFile)161 IOException (java.io.IOException)47 CarbonFileFilter (org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter)45 ArrayList (java.util.ArrayList)38 HashMap (java.util.HashMap)20 FileFactory (org.apache.carbondata.core.datastore.impl.FileFactory)18 CarbonTablePath (org.apache.carbondata.core.util.path.CarbonTablePath)18 Path (org.apache.hadoop.fs.Path)15 List (java.util.List)11 CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)11 Map (java.util.Map)10 HashSet (java.util.HashSet)9 LoadMetadataDetails (org.apache.carbondata.core.statusmanager.LoadMetadataDetails)9 LinkedList (java.util.LinkedList)6 BlockIndex (org.apache.carbondata.format.BlockIndex)6 Segment (org.apache.carbondata.core.index.Segment)5 CarbonIndexFileReader (org.apache.carbondata.core.reader.CarbonIndexFileReader)5 Configuration (org.apache.hadoop.conf.Configuration)5 FileSystem (org.apache.hadoop.fs.FileSystem)5 Test (org.junit.Test)5