Search in sources :

Example 1 with SegmentIndexFileStore

use of org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore in project carbondata by apache.

the class CarbonFileInputFormat method getSplits.

/**
 * {@inheritDoc}
 * Configurations FileInputFormat.INPUT_DIR
 * are used to get table path to read.
 *
 * @param job
 * @return List<InputSplit> list of CarbonInputSplit
 * @throws IOException
 */
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    AbsoluteTableIdentifier identifier = getAbsoluteTableIdentifier(job.getConfiguration());
    CarbonTable carbonTable = getOrCreateCarbonTable(job.getConfiguration());
    if (null == carbonTable) {
        throw new IOException("Missing/Corrupt schema file for table.");
    }
    if (getValidateSegmentsToAccess(job.getConfiguration())) {
        // get all valid segments and set them into the configuration
        // check for externalTable segment (Segment_null)
        // process and resolve the expression
        Expression filter = getFilterPredicates(job.getConfiguration());
        TableProvider tableProvider = new SingleTableProvider(carbonTable);
        // this will be null in case of corrupt schema file.
        PartitionInfo partitionInfo = carbonTable.getPartitionInfo(carbonTable.getTableName());
        carbonTable.processFilterExpression(filter, null, null);
        FilterResolverIntf filterInterface = carbonTable.resolveFilter(filter, tableProvider);
        String segmentDir = CarbonTablePath.getSegmentPath(identifier.getTablePath(), "null");
        FileFactory.FileType fileType = FileFactory.getFileType(segmentDir);
        if (FileFactory.isFileExist(segmentDir, fileType)) {
            // if external table Segments are found, add it to the List
            List<Segment> externalTableSegments = new ArrayList<Segment>();
            Segment seg = new Segment("null", null);
            externalTableSegments.add(seg);
            Map<String, String> indexFiles = new SegmentIndexFileStore().getIndexFilesFromSegment(segmentDir);
            if (indexFiles.size() == 0) {
                throw new RuntimeException("Index file not present to read the carbondata file");
            }
            // do block filtering and get split
            List<InputSplit> splits = getSplits(job, filterInterface, externalTableSegments, null, partitionInfo, null);
            return splits;
        }
    }
    return null;
}
Also used : SegmentIndexFileStore(org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore) ArrayList(java.util.ArrayList) IOException(java.io.IOException) SingleTableProvider(org.apache.carbondata.core.scan.filter.SingleTableProvider) TableProvider(org.apache.carbondata.core.scan.filter.TableProvider) Segment(org.apache.carbondata.core.datamap.Segment) FileFactory(org.apache.carbondata.core.datastore.impl.FileFactory) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) SingleTableProvider(org.apache.carbondata.core.scan.filter.SingleTableProvider) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) Expression(org.apache.carbondata.core.scan.expression.Expression) PartitionInfo(org.apache.carbondata.core.metadata.schema.PartitionInfo) InputSplit(org.apache.hadoop.mapreduce.InputSplit) CarbonInputSplit(org.apache.carbondata.hadoop.CarbonInputSplit) FilterResolverIntf(org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf)

Example 2 with SegmentIndexFileStore

use of org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore in project carbondata by apache.

the class CarbonIndexFileMergeWriter method writeMergeIndexFileBasedOnSegmentFolder.

private String writeMergeIndexFileBasedOnSegmentFolder(List<String> indexFileNamesTobeAdded, boolean readFileFooterFromCarbonDataFile, String segmentPath, CarbonFile[] indexFiles) throws IOException {
    SegmentIndexFileStore fileStore = new SegmentIndexFileStore();
    if (readFileFooterFromCarbonDataFile) {
        // this case will be used in case of upgrade where old store will not have the blocklet
        // info in the index file and therefore blocklet info need to be read from the file footer
        // in the carbondata file
        fileStore.readAllIndexAndFillBolckletInfo(segmentPath);
    } else {
        fileStore.readAllIIndexOfSegment(segmentPath);
    }
    Map<String, byte[]> indexMap = fileStore.getCarbonIndexMap();
    writeMergeIndexFile(indexFileNamesTobeAdded, segmentPath, indexMap);
    for (CarbonFile indexFile : indexFiles) {
        indexFile.delete();
    }
    return null;
}
Also used : CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) SegmentIndexFileStore(org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore)

Example 3 with SegmentIndexFileStore

use of org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore in project carbondata by apache.

the class SegmentFileStore method getSchemaFiles.

/**
 * Reads all index files and get the schema of each index file
 * @throws IOException
 */
public static Map<String, List<ColumnSchema>> getSchemaFiles(SegmentFile segmentFile, String tablePath) throws IOException {
    Map<String, List<ColumnSchema>> schemaMap = new HashMap<>();
    if (segmentFile == null) {
        return schemaMap;
    }
    SegmentIndexFileStore indexFileStore = new SegmentIndexFileStore();
    indexFileStore.readAllIIndexOfSegment(segmentFile, tablePath, SegmentStatus.SUCCESS, true);
    Map<String, byte[]> carbonIndexMap = indexFileStore.getCarbonIndexMapWithFullPath();
    DataFileFooterConverter fileFooterConverter = new DataFileFooterConverter();
    for (Map.Entry<String, byte[]> entry : carbonIndexMap.entrySet()) {
        List<DataFileFooter> indexInfo = fileFooterConverter.getIndexInfo(entry.getKey(), entry.getValue());
        if (indexInfo.size() > 0) {
            schemaMap.put(entry.getKey(), indexInfo.get(0).getColumnInTable());
        }
    }
    return schemaMap;
}
Also used : DataFileFooterConverter(org.apache.carbondata.core.util.DataFileFooterConverter) SegmentIndexFileStore(org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore) HashMap(java.util.HashMap) DataFileFooter(org.apache.carbondata.core.metadata.blocklet.DataFileFooter) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map)

Example 4 with SegmentIndexFileStore

use of org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore in project carbondata by apache.

the class SegmentFileStore method readIndexFiles.

/**
 * Reads all index files as per the status of the file. In case of @ignoreStatus is true it just
 * reads all index files
 * @param status
 * @param ignoreStatus
 * @throws IOException
 */
private void readIndexFiles(SegmentStatus status, boolean ignoreStatus) throws IOException {
    if (indexFilesMap != null) {
        return;
    }
    SegmentIndexFileStore indexFileStore = new SegmentIndexFileStore();
    indexFilesMap = new HashMap<>();
    indexFileStore.readAllIIndexOfSegment(this.segmentFile, tablePath, status, ignoreStatus);
    Map<String, byte[]> carbonIndexMap = indexFileStore.getCarbonIndexMapWithFullPath();
    DataFileFooterConverter fileFooterConverter = new DataFileFooterConverter();
    for (Map.Entry<String, byte[]> entry : carbonIndexMap.entrySet()) {
        List<DataFileFooter> indexInfo = fileFooterConverter.getIndexInfo(entry.getKey(), entry.getValue());
        List<String> blocks = new ArrayList<>();
        for (DataFileFooter footer : indexInfo) {
            blocks.add(footer.getBlockInfo().getTableBlockInfo().getFilePath());
        }
        indexFilesMap.put(entry.getKey(), blocks);
    }
}
Also used : DataFileFooterConverter(org.apache.carbondata.core.util.DataFileFooterConverter) SegmentIndexFileStore(org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore) DataFileFooter(org.apache.carbondata.core.metadata.blocklet.DataFileFooter) ArrayList(java.util.ArrayList) HashMap(java.util.HashMap) Map(java.util.Map)

Example 5 with SegmentIndexFileStore

use of org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore in project carbondata by apache.

the class BlockletDataMapIndexStore method getAll.

@Override
public List<BlockletDataMap> getAll(List<TableBlockIndexUniqueIdentifier> tableSegmentUniqueIdentifiers) throws IOException {
    List<BlockletDataMap> blockletDataMaps = new ArrayList<>(tableSegmentUniqueIdentifiers.size());
    List<TableBlockIndexUniqueIdentifier> missedIdentifiers = new ArrayList<>();
    // Get the datamaps for each indexfile from cache.
    try {
        for (TableBlockIndexUniqueIdentifier identifier : tableSegmentUniqueIdentifiers) {
            BlockletDataMap ifPresent = getIfPresent(identifier);
            if (ifPresent != null) {
                blockletDataMaps.add(ifPresent);
            } else {
                missedIdentifiers.add(identifier);
            }
        }
        if (missedIdentifiers.size() > 0) {
            SegmentIndexFileStore indexFileStore = new SegmentIndexFileStore();
            Set<String> filesRead = new HashSet<>();
            for (TableBlockIndexUniqueIdentifier identifier : missedIdentifiers) {
                Map<String, BlockMetaInfo> blockMetaInfoMap = getBlockMetaInfoMap(identifier, indexFileStore, filesRead);
                blockletDataMaps.add(loadAndGetDataMap(identifier, indexFileStore, blockMetaInfoMap));
            }
        }
    } catch (Throwable e) {
        for (BlockletDataMap dataMap : blockletDataMaps) {
            dataMap.clear();
        }
        throw new IOException("Problem in loading segment blocks.", e);
    }
    return blockletDataMaps;
}
Also used : SegmentIndexFileStore(org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore) ArrayList(java.util.ArrayList) IOException(java.io.IOException) BlockletDataMap(org.apache.carbondata.core.indexstore.blockletindex.BlockletDataMap) HashSet(java.util.HashSet)

Aggregations

SegmentIndexFileStore (org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore)8 ArrayList (java.util.ArrayList)4 HashMap (java.util.HashMap)3 Map (java.util.Map)3 CarbonFile (org.apache.carbondata.core.datastore.filesystem.CarbonFile)3 IOException (java.io.IOException)2 HashSet (java.util.HashSet)2 BlockletDataMap (org.apache.carbondata.core.indexstore.blockletindex.BlockletDataMap)2 DataFileFooter (org.apache.carbondata.core.metadata.blocklet.DataFileFooter)2 DataFileFooterConverter (org.apache.carbondata.core.util.DataFileFooterConverter)2 List (java.util.List)1 Segment (org.apache.carbondata.core.datamap.Segment)1 CarbonFileFilter (org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter)1 FileFactory (org.apache.carbondata.core.datastore.impl.FileFactory)1 PartitionSpec (org.apache.carbondata.core.indexstore.PartitionSpec)1 MemoryException (org.apache.carbondata.core.memory.MemoryException)1 AbsoluteTableIdentifier (org.apache.carbondata.core.metadata.AbsoluteTableIdentifier)1 PartitionInfo (org.apache.carbondata.core.metadata.schema.PartitionInfo)1 CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)1 Expression (org.apache.carbondata.core.scan.expression.Expression)1