use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.
the class CarbonDataMergerUtil method getSizeOfFactFileInLoad.
/**
* Returns the size of all the carbondata files present in the segment.
* @param carbonFile
* @return
*/
private static long getSizeOfFactFileInLoad(CarbonFile carbonFile) {
long factSize = 0;
// carbon data file case.
CarbonFile[] factFile = carbonFile.listFiles(new CarbonFileFilter() {
@Override
public boolean accept(CarbonFile file) {
return CarbonTablePath.isCarbonDataFile(file.getName());
}
});
for (CarbonFile fact : factFile) {
factSize += fact.getSize();
}
return factSize;
}
use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.
the class CarbonTableReader method parseCarbonMetadata.
/**
* Read the metadata of the given table
* and cache it in this.carbonCache (CarbonTableReader cache).
*
* @param table name of the given table.
* @return the CarbonTableCacheModel instance which contains all the needed metadata for a table.
*/
private CarbonTableCacheModel parseCarbonMetadata(SchemaTableName table, String tablePath, Configuration config) {
try {
CarbonTableCacheModel cache = getValidCacheBySchemaTableName(table);
if (cache != null) {
return cache;
}
// multiple tasks can be launched in a worker concurrently. Hence need to synchronize this.
synchronized (this) {
// cache might be filled by another thread, so if filled use that cache.
CarbonTableCacheModel cacheModel = getValidCacheBySchemaTableName(table);
if (cacheModel != null) {
return cacheModel;
}
// Step 1: get store path of the table and cache it.
String schemaFilePath = CarbonTablePath.getSchemaFilePath(tablePath, config);
// If metadata folder exists, it is a transactional table
CarbonFile schemaFile = FileFactory.getCarbonFile(schemaFilePath, config);
boolean isTransactionalTable = schemaFile.exists();
org.apache.carbondata.format.TableInfo tableInfo;
long modifiedTime = System.currentTimeMillis();
if (isTransactionalTable) {
// Step 2: read the metadata (tableInfo) of the table.
ThriftReader.TBaseCreator createTBase = new ThriftReader.TBaseCreator() {
// TBase is used to read and write thrift objects.
// TableInfo is a kind of TBase used to read and write table information.
// TableInfo is generated by thrift,
// see schema.thrift under format/src/main/thrift for details.
public TBase create() {
return new org.apache.carbondata.format.TableInfo();
}
};
ThriftReader thriftReader = new ThriftReader(schemaFilePath, createTBase, config);
thriftReader.open();
tableInfo = (org.apache.carbondata.format.TableInfo) thriftReader.read();
thriftReader.close();
modifiedTime = schemaFile.getLastModifiedTime();
} else {
tableInfo = CarbonUtil.inferSchema(tablePath, table.getTableName(), false, config);
}
// Step 3: convert format level TableInfo to code level TableInfo
SchemaConverter schemaConverter = new ThriftWrapperSchemaConverterImpl();
// wrapperTableInfo is the code level information of a table in carbondata core,
// different from the Thrift TableInfo.
TableInfo wrapperTableInfo = schemaConverter.fromExternalToWrapperTableInfo(tableInfo, table.getSchemaName(), table.getTableName(), tablePath);
wrapperTableInfo.setTransactionalTable(isTransactionalTable);
CarbonMetadata.getInstance().removeTable(wrapperTableInfo.getTableUniqueName());
// Step 4: Load metadata info into CarbonMetadata
CarbonMetadata.getInstance().loadTableMetadata(wrapperTableInfo);
CarbonTable carbonTable = Objects.requireNonNull(CarbonMetadata.getInstance().getCarbonTable(table.getSchemaName(), table.getTableName()), "carbontable is null");
refreshIndexInfo(carbonTable, config);
cache = new CarbonTableCacheModel(modifiedTime, carbonTable);
// cache the table
carbonCache.get().put(table, cache);
cache.setCarbonTable(carbonTable);
}
return cache;
} catch (Exception ex) {
throw new RuntimeException(ex);
}
}
use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.
the class SegmentIndexFileStore method getIndexFilesFromSegment.
/**
* Read all index file names of the segment
*
* @param segmentPath
* @return
* @throws IOException
*/
public Map<String, String> getIndexFilesFromSegment(String segmentPath) throws IOException {
CarbonFile[] carbonIndexFiles = getCarbonIndexFiles(segmentPath, FileFactory.getConfiguration());
Map<String, String> indexFiles = new HashMap<>();
for (CarbonFile carbonIndexFile : carbonIndexFiles) {
if (carbonIndexFile.getName().endsWith(CarbonTablePath.MERGE_INDEX_FILE_EXT)) {
List<String> indexFilesFromMergeFile = getIndexFilesFromMergeFile(carbonIndexFile.getCanonicalPath());
for (String file : indexFilesFromMergeFile) {
indexFiles.put(carbonIndexFile.getParentFile().getAbsolutePath() + CarbonCommonConstants.FILE_SEPARATOR + file, carbonIndexFile.getName());
}
} else if (carbonIndexFile.getName().endsWith(CarbonTablePath.INDEX_FILE_EXT)) {
indexFiles.put(carbonIndexFile.getAbsolutePath(), null);
}
}
return indexFiles;
}
use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.
the class SegmentIndexFileStore method readAllIIndexOfSegment.
/**
* Read all index files and keep the cache in it.
*
* @param segmentFile
* @throws IOException
*/
public void readAllIIndexOfSegment(SegmentFileStore.SegmentFile segmentFile, String tablePath, SegmentStatus status, boolean ignoreStatus) throws IOException {
List<CarbonFile> carbonIndexFiles = new ArrayList<>();
Set<String> indexFiles = new HashSet<>();
if (segmentFile == null) {
return;
}
for (Map.Entry<String, SegmentFileStore.FolderDetails> locations : segmentFile.getLocationMap().entrySet()) {
String location = locations.getKey();
if (locations.getValue().getStatus().equals(status.getMessage()) || ignoreStatus) {
if (locations.getValue().isRelative()) {
location = tablePath + CarbonCommonConstants.FILE_SEPARATOR + location;
}
String mergeFileName = locations.getValue().getMergeFileName();
if (mergeFileName != null) {
CarbonFile mergeFile = FileFactory.getCarbonFile(location + CarbonCommonConstants.FILE_SEPARATOR + mergeFileName);
if (mergeFile.exists() && !indexFiles.contains(mergeFile.getAbsolutePath())) {
carbonIndexFiles.add(mergeFile);
indexFiles.add(mergeFile.getAbsolutePath());
}
}
for (String indexFile : locations.getValue().getFiles()) {
CarbonFile carbonFile = FileFactory.getCarbonFile(location + CarbonCommonConstants.FILE_SEPARATOR + indexFile);
if (carbonFile.exists() && !indexFiles.contains(carbonFile.getAbsolutePath())) {
carbonIndexFiles.add(carbonFile);
indexFiles.add(carbonFile.getAbsolutePath());
}
}
}
}
for (CarbonFile carbonIndexFile : carbonIndexFiles) {
if (carbonIndexFile.getName().endsWith(CarbonTablePath.MERGE_INDEX_FILE_EXT)) {
readMergeFile(carbonIndexFile.getCanonicalPath());
} else if (carbonIndexFile.getName().endsWith(CarbonTablePath.INDEX_FILE_EXT)) {
readIndexFile(carbonIndexFile);
}
}
}
use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.
the class SegmentIndexFileStore method readMergeFile.
/**
* Read carbonindexmerge file and update the map
*
* @param mergeFilePath
* @throws IOException
*/
public void readMergeFile(String mergeFilePath) throws IOException {
ThriftReader thriftReader = new ThriftReader(mergeFilePath, configuration);
try {
thriftReader.open();
MergedBlockIndexHeader indexHeader = readMergeBlockIndexHeader(thriftReader);
MergedBlockIndex mergedBlockIndex = readMergeBlockIndex(thriftReader);
List<String> file_names = indexHeader.getFile_names();
carbonMergeFileToIndexFilesMap.put(mergeFilePath, file_names);
List<ByteBuffer> fileData = mergedBlockIndex.getFileData();
CarbonFile mergeFile = FileFactory.getCarbonFile(mergeFilePath, configuration);
String mergeFileAbsolutePath = mergeFile.getParentFile().getAbsolutePath();
assert (file_names.size() == fileData.size());
for (int i = 0; i < file_names.size(); i++) {
byte[] data = fileData.get(i).array();
carbonIndexMap.put(file_names.get(i), data);
carbonIndexMapWithFullPath.put(mergeFileAbsolutePath + CarbonCommonConstants.FILE_SEPARATOR + file_names.get(i), data);
}
} finally {
thriftReader.close();
}
}
Aggregations