use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.
the class SegmentIndexFileStore method getMergeOrIndexFilesFromSegment.
/**
* Read all index file names of the segment
*
* @param segmentPath
* @return
*/
public Map<String, String> getMergeOrIndexFilesFromSegment(String segmentPath) {
CarbonFile[] carbonIndexFiles = getCarbonIndexFiles(segmentPath, FileFactory.getConfiguration());
Map<String, String> indexFiles = new HashMap<>();
for (CarbonFile carbonIndexFile : carbonIndexFiles) {
if (carbonIndexFile.getName().endsWith(CarbonTablePath.MERGE_INDEX_FILE_EXT)) {
indexFiles.put(carbonIndexFile.getAbsolutePath(), carbonIndexFile.getAbsolutePath());
} else if (carbonIndexFile.getName().endsWith(CarbonTablePath.INDEX_FILE_EXT)) {
indexFiles.put(carbonIndexFile.getAbsolutePath(), null);
}
}
return indexFiles;
}
use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.
the class CarbonFileInputFormat method getSplits.
/**
* get list of block/blocklet and make them to CarbonInputSplit
* @param job JobContext with Configuration
* @return list of CarbonInputSplit
* @throws IOException
*/
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
CarbonTable carbonTable = getOrCreateCarbonTable(job.getConfiguration());
if (null == carbonTable) {
throw new IOException("Missing/Corrupt schema file for table.");
}
AbsoluteTableIdentifier identifier = carbonTable.getAbsoluteTableIdentifier();
// get all valid segments and set them into the configuration
// check for externalTable segment (Segment_null)
// process and resolve the expression
ReadCommittedScope readCommittedScope;
if (carbonTable.isTransactionalTable()) {
readCommittedScope = new LatestFilesReadCommittedScope(identifier.getTablePath() + "/Fact/Part0/Segment_null/", job.getConfiguration());
} else {
readCommittedScope = getReadCommittedScope(job.getConfiguration());
if (readCommittedScope == null) {
readCommittedScope = new LatestFilesReadCommittedScope(identifier.getTablePath(), job.getConfiguration());
} else {
readCommittedScope.setConfiguration(job.getConfiguration());
}
}
// this will be null in case of corrupt schema file.
IndexFilter filter = getFilterPredicates(job.getConfiguration());
// if external table Segments are found, add it to the List
List<Segment> externalTableSegments = new ArrayList<>();
Segment seg;
if (carbonTable.isTransactionalTable()) {
// SDK some cases write into the Segment Path instead of Table Path i.e. inside
// the "Fact/Part0/Segment_null". The segment in this case is named as "null".
// The table is denoted by default as a transactional table and goes through
// the path of CarbonFileInputFormat. The above scenario is handled in the below code.
seg = new Segment("null", null, readCommittedScope);
externalTableSegments.add(seg);
} else {
LoadMetadataDetails[] loadMetadataDetails = readCommittedScope.getSegmentList();
for (LoadMetadataDetails load : loadMetadataDetails) {
seg = new Segment(load.getLoadName(), null, readCommittedScope);
if (fileLists != null) {
for (Object fileList : fileLists) {
String timestamp = CarbonTablePath.DataFileUtil.getTimeStampFromFileName(fileList.toString());
if (timestamp.equals(seg.getSegmentNo())) {
externalTableSegments.add(seg);
break;
}
}
} else {
externalTableSegments.add(seg);
}
}
}
List<InputSplit> splits = new ArrayList<>();
boolean useBlockIndex = job.getConfiguration().getBoolean("filter_blocks", true);
// scenarios
if (filter != null) {
filter.resolve(false);
}
if (useBlockIndex) {
// do block filtering and get split
splits = getSplits(job, filter, externalTableSegments);
} else {
List<CarbonFile> carbonFiles;
if (null != this.fileLists) {
carbonFiles = getAllCarbonDataFiles(this.fileLists);
} else {
carbonFiles = getAllCarbonDataFiles(carbonTable.getTablePath());
}
List<String> allDeleteDeltaFiles = getAllDeleteDeltaFiles(carbonTable.getTablePath());
for (CarbonFile carbonFile : carbonFiles) {
// Segment id is set to null because SDK does not write carbondata files with respect
// to segments. So no specific name is present for this load.
CarbonInputSplit split = new CarbonInputSplit("null", carbonFile.getAbsolutePath(), 0, carbonFile.getLength(), carbonFile.getLocations(), FileFormat.COLUMNAR_V3);
split.setVersion(ColumnarFormatVersion.V3);
BlockletDetailInfo info = new BlockletDetailInfo();
split.setDetailInfo(info);
info.setBlockSize(carbonFile.getLength());
info.setVersionNumber(split.getVersion().number());
info.setUseMinMaxForPruning(false);
if (CollectionUtils.isNotEmpty(allDeleteDeltaFiles)) {
split.setDeleteDeltaFiles(getDeleteDeltaFiles(carbonFile.getAbsolutePath(), allDeleteDeltaFiles));
}
splits.add(split);
}
splits.sort(Comparator.comparing(o -> ((CarbonInputSplit) o).getFilePath()));
}
setAllColumnProjectionIfNotConfigured(job, carbonTable);
return splits;
}
use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.
the class LuceneFineGrainIndex method init.
/**
* It is called to load the index to memory or to initialize it.
*/
public void init(IndexModel indexModel) throws IOException {
long startTime = System.currentTimeMillis();
// get this path from file path
Path indexPath = FileFactory.getPath(indexModel.getFilePath());
LOGGER.info("Lucene index read path " + indexPath.toString());
this.filePath = indexPath.getName();
this.indexSearcherMap = new HashMap<>();
// get file system , use hdfs file system , realized in solr project
CarbonFile indexFilePath = FileFactory.getCarbonFile(indexPath.toString());
// check this path valid
if (!indexFilePath.exists()) {
String errorMessage = String.format("index directory %s not exists.", indexPath);
LOGGER.error(errorMessage);
throw new IOException(errorMessage);
}
if (!indexFilePath.isDirectory()) {
String errorMessage = String.format("error index path %s, must be directory", indexPath);
LOGGER.error(errorMessage);
throw new IOException(errorMessage);
}
if (storeBlockletWise) {
CarbonFile[] blockletDirs = indexFilePath.listFiles();
for (CarbonFile blockletDir : blockletDirs) {
IndexSearcher indexSearcher = createIndexSearcher(new Path(blockletDir.getAbsolutePath()));
indexSearcherMap.put(blockletDir.getName(), indexSearcher);
}
} else {
IndexSearcher indexSearcher = createIndexSearcher(indexPath);
indexSearcherMap.put("-1", indexSearcher);
}
LOGGER.info("Time taken to initialize lucene searcher: " + (System.currentTimeMillis() - startTime));
}
use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.
the class BloomCoarseGrainIndexFactory method deleteSegmentIndexData.
@Override
public void deleteSegmentIndexData(String segmentId) throws IOException {
try {
String indexPath = CarbonTablePath.getIndexesStorePath(getCarbonTable().getTablePath(), segmentId, indexName);
if (FileFactory.isFileExist(indexPath)) {
CarbonFile file = FileFactory.getCarbonFile(indexPath);
CarbonUtil.deleteFoldersAndFilesSilent(file);
}
clear(segmentId);
} catch (InterruptedException ex) {
throw new IOException("Failed to delete index for segment_" + segmentId);
}
}
use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.
the class CarbonDictionarySortIndexWriterImpl method cleanUpOldSortIndex.
/**
* It cleans up old unused sortindex file
*
* @param carbonTablePath
*/
protected void cleanUpOldSortIndex(CarbonTablePath carbonTablePath, String dictPath) {
CarbonFile dictFile = FileFactory.getCarbonFile(dictPath, FileFactory.getFileType(dictPath));
CarbonFile[] files = carbonTablePath.getSortIndexFiles(dictFile.getParentFile(), columnIdentifier.getColumnId());
int maxTime;
try {
maxTime = Integer.parseInt(CarbonProperties.getInstance().getProperty(CarbonCommonConstants.MAX_QUERY_EXECUTION_TIME));
} catch (NumberFormatException e) {
maxTime = CarbonCommonConstants.DEFAULT_MAX_QUERY_EXECUTION_TIME;
}
if (null != files) {
Arrays.sort(files, new Comparator<CarbonFile>() {
@Override
public int compare(CarbonFile o1, CarbonFile o2) {
return o1.getName().compareTo(o2.getName());
}
});
for (int i = 0; i < files.length - 1; i++) {
long difference = System.currentTimeMillis() - files[i].getLastModifiedTime();
long minutesElapsed = (difference / (1000 * 60));
if (minutesElapsed > maxTime) {
if (!files[i].delete()) {
LOGGER.warn("Failed to delete sortindex file." + files[i].getAbsolutePath());
} else {
LOGGER.info("Sort index file is deleted." + files[i].getAbsolutePath());
}
}
}
}
}
Aggregations