Search in sources :

Example 86 with CarbonFile

use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.

the class SegmentIndexFileStore method getMergeOrIndexFilesFromSegment.

/**
 * Read all index file names of the segment
 *
 * @param segmentPath
 * @return
 */
public Map<String, String> getMergeOrIndexFilesFromSegment(String segmentPath) {
    CarbonFile[] carbonIndexFiles = getCarbonIndexFiles(segmentPath, FileFactory.getConfiguration());
    Map<String, String> indexFiles = new HashMap<>();
    for (CarbonFile carbonIndexFile : carbonIndexFiles) {
        if (carbonIndexFile.getName().endsWith(CarbonTablePath.MERGE_INDEX_FILE_EXT)) {
            indexFiles.put(carbonIndexFile.getAbsolutePath(), carbonIndexFile.getAbsolutePath());
        } else if (carbonIndexFile.getName().endsWith(CarbonTablePath.INDEX_FILE_EXT)) {
            indexFiles.put(carbonIndexFile.getAbsolutePath(), null);
        }
    }
    return indexFiles;
}
Also used : CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) HashMap(java.util.HashMap)

Example 87 with CarbonFile

use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.

the class CarbonFileInputFormat method getSplits.

/**
 * get list of block/blocklet and make them to CarbonInputSplit
 * @param job JobContext with Configuration
 * @return list of CarbonInputSplit
 * @throws IOException
 */
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    CarbonTable carbonTable = getOrCreateCarbonTable(job.getConfiguration());
    if (null == carbonTable) {
        throw new IOException("Missing/Corrupt schema file for table.");
    }
    AbsoluteTableIdentifier identifier = carbonTable.getAbsoluteTableIdentifier();
    // get all valid segments and set them into the configuration
    // check for externalTable segment (Segment_null)
    // process and resolve the expression
    ReadCommittedScope readCommittedScope;
    if (carbonTable.isTransactionalTable()) {
        readCommittedScope = new LatestFilesReadCommittedScope(identifier.getTablePath() + "/Fact/Part0/Segment_null/", job.getConfiguration());
    } else {
        readCommittedScope = getReadCommittedScope(job.getConfiguration());
        if (readCommittedScope == null) {
            readCommittedScope = new LatestFilesReadCommittedScope(identifier.getTablePath(), job.getConfiguration());
        } else {
            readCommittedScope.setConfiguration(job.getConfiguration());
        }
    }
    // this will be null in case of corrupt schema file.
    IndexFilter filter = getFilterPredicates(job.getConfiguration());
    // if external table Segments are found, add it to the List
    List<Segment> externalTableSegments = new ArrayList<>();
    Segment seg;
    if (carbonTable.isTransactionalTable()) {
        // SDK some cases write into the Segment Path instead of Table Path i.e. inside
        // the "Fact/Part0/Segment_null". The segment in this case is named as "null".
        // The table is denoted by default as a transactional table and goes through
        // the path of CarbonFileInputFormat. The above scenario is handled in the below code.
        seg = new Segment("null", null, readCommittedScope);
        externalTableSegments.add(seg);
    } else {
        LoadMetadataDetails[] loadMetadataDetails = readCommittedScope.getSegmentList();
        for (LoadMetadataDetails load : loadMetadataDetails) {
            seg = new Segment(load.getLoadName(), null, readCommittedScope);
            if (fileLists != null) {
                for (Object fileList : fileLists) {
                    String timestamp = CarbonTablePath.DataFileUtil.getTimeStampFromFileName(fileList.toString());
                    if (timestamp.equals(seg.getSegmentNo())) {
                        externalTableSegments.add(seg);
                        break;
                    }
                }
            } else {
                externalTableSegments.add(seg);
            }
        }
    }
    List<InputSplit> splits = new ArrayList<>();
    boolean useBlockIndex = job.getConfiguration().getBoolean("filter_blocks", true);
    // scenarios
    if (filter != null) {
        filter.resolve(false);
    }
    if (useBlockIndex) {
        // do block filtering and get split
        splits = getSplits(job, filter, externalTableSegments);
    } else {
        List<CarbonFile> carbonFiles;
        if (null != this.fileLists) {
            carbonFiles = getAllCarbonDataFiles(this.fileLists);
        } else {
            carbonFiles = getAllCarbonDataFiles(carbonTable.getTablePath());
        }
        List<String> allDeleteDeltaFiles = getAllDeleteDeltaFiles(carbonTable.getTablePath());
        for (CarbonFile carbonFile : carbonFiles) {
            // Segment id is set to null because SDK does not write carbondata files with respect
            // to segments. So no specific name is present for this load.
            CarbonInputSplit split = new CarbonInputSplit("null", carbonFile.getAbsolutePath(), 0, carbonFile.getLength(), carbonFile.getLocations(), FileFormat.COLUMNAR_V3);
            split.setVersion(ColumnarFormatVersion.V3);
            BlockletDetailInfo info = new BlockletDetailInfo();
            split.setDetailInfo(info);
            info.setBlockSize(carbonFile.getLength());
            info.setVersionNumber(split.getVersion().number());
            info.setUseMinMaxForPruning(false);
            if (CollectionUtils.isNotEmpty(allDeleteDeltaFiles)) {
                split.setDeleteDeltaFiles(getDeleteDeltaFiles(carbonFile.getAbsolutePath(), allDeleteDeltaFiles));
            }
            splits.add(split);
        }
        splits.sort(Comparator.comparing(o -> ((CarbonInputSplit) o).getFilePath()));
    }
    setAllColumnProjectionIfNotConfigured(job, carbonTable);
    return splits;
}
Also used : Segment(org.apache.carbondata.core.index.Segment) BlockletDetailInfo(org.apache.carbondata.core.indexstore.BlockletDetailInfo) TableInfo(org.apache.carbondata.core.metadata.schema.table.TableInfo) FileFactory(org.apache.carbondata.core.datastore.impl.FileFactory) LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) ArrayList(java.util.ArrayList) CarbonCommonConstants(org.apache.carbondata.core.constants.CarbonCommonConstants) ColumnarFormatVersion(org.apache.carbondata.core.metadata.ColumnarFormatVersion) CollectionUtils(org.apache.commons.collections.CollectionUtils) Configuration(org.apache.hadoop.conf.Configuration) LinkedList(java.util.LinkedList) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) ReadCommittedScope(org.apache.carbondata.core.readcommitter.ReadCommittedScope) InterfaceAudience(org.apache.carbondata.common.annotations.InterfaceAudience) CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) InputSplit(org.apache.hadoop.mapreduce.InputSplit) InterfaceStability(org.apache.carbondata.common.annotations.InterfaceStability) CarbonTablePath(org.apache.carbondata.core.util.path.CarbonTablePath) IOException(java.io.IOException) File(java.io.File) Serializable(java.io.Serializable) LatestFilesReadCommittedScope(org.apache.carbondata.core.readcommitter.LatestFilesReadCommittedScope) List(java.util.List) CarbonInputSplit(org.apache.carbondata.hadoop.CarbonInputSplit) FileFormat(org.apache.carbondata.core.statusmanager.FileFormat) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) JobContext(org.apache.hadoop.mapreduce.JobContext) Pattern(java.util.regex.Pattern) IndexFilter(org.apache.carbondata.core.index.IndexFilter) Comparator(java.util.Comparator) CarbonFileFilter(org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter) SchemaReader(org.apache.carbondata.core.metadata.schema.SchemaReader) ArrayUtils(org.apache.commons.lang.ArrayUtils) CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) LatestFilesReadCommittedScope(org.apache.carbondata.core.readcommitter.LatestFilesReadCommittedScope) LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) ArrayList(java.util.ArrayList) IOException(java.io.IOException) CarbonInputSplit(org.apache.carbondata.hadoop.CarbonInputSplit) Segment(org.apache.carbondata.core.index.Segment) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) ReadCommittedScope(org.apache.carbondata.core.readcommitter.ReadCommittedScope) LatestFilesReadCommittedScope(org.apache.carbondata.core.readcommitter.LatestFilesReadCommittedScope) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) BlockletDetailInfo(org.apache.carbondata.core.indexstore.BlockletDetailInfo) IndexFilter(org.apache.carbondata.core.index.IndexFilter) InputSplit(org.apache.hadoop.mapreduce.InputSplit) CarbonInputSplit(org.apache.carbondata.hadoop.CarbonInputSplit)

Example 88 with CarbonFile

use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.

the class LuceneFineGrainIndex method init.

/**
 * It is called to load the index to memory or to initialize it.
 */
public void init(IndexModel indexModel) throws IOException {
    long startTime = System.currentTimeMillis();
    // get this path from file path
    Path indexPath = FileFactory.getPath(indexModel.getFilePath());
    LOGGER.info("Lucene index read path " + indexPath.toString());
    this.filePath = indexPath.getName();
    this.indexSearcherMap = new HashMap<>();
    // get file system , use hdfs file system , realized in solr project
    CarbonFile indexFilePath = FileFactory.getCarbonFile(indexPath.toString());
    // check this path valid
    if (!indexFilePath.exists()) {
        String errorMessage = String.format("index directory %s not exists.", indexPath);
        LOGGER.error(errorMessage);
        throw new IOException(errorMessage);
    }
    if (!indexFilePath.isDirectory()) {
        String errorMessage = String.format("error index path %s, must be directory", indexPath);
        LOGGER.error(errorMessage);
        throw new IOException(errorMessage);
    }
    if (storeBlockletWise) {
        CarbonFile[] blockletDirs = indexFilePath.listFiles();
        for (CarbonFile blockletDir : blockletDirs) {
            IndexSearcher indexSearcher = createIndexSearcher(new Path(blockletDir.getAbsolutePath()));
            indexSearcherMap.put(blockletDir.getName(), indexSearcher);
        }
    } else {
        IndexSearcher indexSearcher = createIndexSearcher(indexPath);
        indexSearcherMap.put("-1", indexSearcher);
    }
    LOGGER.info("Time taken to initialize lucene searcher: " + (System.currentTimeMillis() - startTime));
}
Also used : Path(org.apache.hadoop.fs.Path) IndexSearcher(org.apache.lucene.search.IndexSearcher) CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) IOException(java.io.IOException)

Example 89 with CarbonFile

use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.

the class BloomCoarseGrainIndexFactory method deleteSegmentIndexData.

@Override
public void deleteSegmentIndexData(String segmentId) throws IOException {
    try {
        String indexPath = CarbonTablePath.getIndexesStorePath(getCarbonTable().getTablePath(), segmentId, indexName);
        if (FileFactory.isFileExist(indexPath)) {
            CarbonFile file = FileFactory.getCarbonFile(indexPath);
            CarbonUtil.deleteFoldersAndFilesSilent(file);
        }
        clear(segmentId);
    } catch (InterruptedException ex) {
        throw new IOException("Failed to delete index for segment_" + segmentId);
    }
}
Also used : CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) IOException(java.io.IOException)

Example 90 with CarbonFile

use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.

the class CarbonDictionarySortIndexWriterImpl method cleanUpOldSortIndex.

/**
   * It cleans up old unused sortindex file
   *
   * @param carbonTablePath
   */
protected void cleanUpOldSortIndex(CarbonTablePath carbonTablePath, String dictPath) {
    CarbonFile dictFile = FileFactory.getCarbonFile(dictPath, FileFactory.getFileType(dictPath));
    CarbonFile[] files = carbonTablePath.getSortIndexFiles(dictFile.getParentFile(), columnIdentifier.getColumnId());
    int maxTime;
    try {
        maxTime = Integer.parseInt(CarbonProperties.getInstance().getProperty(CarbonCommonConstants.MAX_QUERY_EXECUTION_TIME));
    } catch (NumberFormatException e) {
        maxTime = CarbonCommonConstants.DEFAULT_MAX_QUERY_EXECUTION_TIME;
    }
    if (null != files) {
        Arrays.sort(files, new Comparator<CarbonFile>() {

            @Override
            public int compare(CarbonFile o1, CarbonFile o2) {
                return o1.getName().compareTo(o2.getName());
            }
        });
        for (int i = 0; i < files.length - 1; i++) {
            long difference = System.currentTimeMillis() - files[i].getLastModifiedTime();
            long minutesElapsed = (difference / (1000 * 60));
            if (minutesElapsed > maxTime) {
                if (!files[i].delete()) {
                    LOGGER.warn("Failed to delete sortindex file." + files[i].getAbsolutePath());
                } else {
                    LOGGER.info("Sort index file is deleted." + files[i].getAbsolutePath());
                }
            }
        }
    }
}
Also used : CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile)

Aggregations

CarbonFile (org.apache.carbondata.core.datastore.filesystem.CarbonFile)161 IOException (java.io.IOException)47 CarbonFileFilter (org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter)45 ArrayList (java.util.ArrayList)38 HashMap (java.util.HashMap)20 FileFactory (org.apache.carbondata.core.datastore.impl.FileFactory)18 CarbonTablePath (org.apache.carbondata.core.util.path.CarbonTablePath)18 Path (org.apache.hadoop.fs.Path)15 List (java.util.List)11 CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)11 Map (java.util.Map)10 HashSet (java.util.HashSet)9 LoadMetadataDetails (org.apache.carbondata.core.statusmanager.LoadMetadataDetails)9 LinkedList (java.util.LinkedList)6 BlockIndex (org.apache.carbondata.format.BlockIndex)6 Segment (org.apache.carbondata.core.index.Segment)5 CarbonIndexFileReader (org.apache.carbondata.core.reader.CarbonIndexFileReader)5 Configuration (org.apache.hadoop.conf.Configuration)5 FileSystem (org.apache.hadoop.fs.FileSystem)5 Test (org.junit.Test)5