Search in sources :

Example 1 with LatestFilesReadCommittedScope

use of org.apache.carbondata.core.readcommitter.LatestFilesReadCommittedScope in project carbondata by apache.

the class CarbonFileInputFormat method getSplits.

/**
 * get list of block/blocklet and make them to CarbonInputSplit
 * @param job JobContext with Configuration
 * @return list of CarbonInputSplit
 * @throws IOException
 */
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    CarbonTable carbonTable = getOrCreateCarbonTable(job.getConfiguration());
    if (null == carbonTable) {
        throw new IOException("Missing/Corrupt schema file for table.");
    }
    AbsoluteTableIdentifier identifier = carbonTable.getAbsoluteTableIdentifier();
    // get all valid segments and set them into the configuration
    // check for externalTable segment (Segment_null)
    // process and resolve the expression
    ReadCommittedScope readCommittedScope;
    if (carbonTable.isTransactionalTable()) {
        readCommittedScope = new LatestFilesReadCommittedScope(identifier.getTablePath() + "/Fact/Part0/Segment_null/", job.getConfiguration());
    } else {
        readCommittedScope = getReadCommittedScope(job.getConfiguration());
        if (readCommittedScope == null) {
            readCommittedScope = new LatestFilesReadCommittedScope(identifier.getTablePath(), job.getConfiguration());
        } else {
            readCommittedScope.setConfiguration(job.getConfiguration());
        }
    }
    // this will be null in case of corrupt schema file.
    IndexFilter filter = getFilterPredicates(job.getConfiguration());
    // if external table Segments are found, add it to the List
    List<Segment> externalTableSegments = new ArrayList<>();
    Segment seg;
    if (carbonTable.isTransactionalTable()) {
        // SDK some cases write into the Segment Path instead of Table Path i.e. inside
        // the "Fact/Part0/Segment_null". The segment in this case is named as "null".
        // The table is denoted by default as a transactional table and goes through
        // the path of CarbonFileInputFormat. The above scenario is handled in the below code.
        seg = new Segment("null", null, readCommittedScope);
        externalTableSegments.add(seg);
    } else {
        LoadMetadataDetails[] loadMetadataDetails = readCommittedScope.getSegmentList();
        for (LoadMetadataDetails load : loadMetadataDetails) {
            seg = new Segment(load.getLoadName(), null, readCommittedScope);
            if (fileLists != null) {
                for (Object fileList : fileLists) {
                    String timestamp = CarbonTablePath.DataFileUtil.getTimeStampFromFileName(fileList.toString());
                    if (timestamp.equals(seg.getSegmentNo())) {
                        externalTableSegments.add(seg);
                        break;
                    }
                }
            } else {
                externalTableSegments.add(seg);
            }
        }
    }
    List<InputSplit> splits = new ArrayList<>();
    boolean useBlockIndex = job.getConfiguration().getBoolean("filter_blocks", true);
    // scenarios
    if (filter != null) {
        filter.resolve(false);
    }
    if (useBlockIndex) {
        // do block filtering and get split
        splits = getSplits(job, filter, externalTableSegments);
    } else {
        List<CarbonFile> carbonFiles;
        if (null != this.fileLists) {
            carbonFiles = getAllCarbonDataFiles(this.fileLists);
        } else {
            carbonFiles = getAllCarbonDataFiles(carbonTable.getTablePath());
        }
        List<String> allDeleteDeltaFiles = getAllDeleteDeltaFiles(carbonTable.getTablePath());
        for (CarbonFile carbonFile : carbonFiles) {
            // Segment id is set to null because SDK does not write carbondata files with respect
            // to segments. So no specific name is present for this load.
            CarbonInputSplit split = new CarbonInputSplit("null", carbonFile.getAbsolutePath(), 0, carbonFile.getLength(), carbonFile.getLocations(), FileFormat.COLUMNAR_V3);
            split.setVersion(ColumnarFormatVersion.V3);
            BlockletDetailInfo info = new BlockletDetailInfo();
            split.setDetailInfo(info);
            info.setBlockSize(carbonFile.getLength());
            info.setVersionNumber(split.getVersion().number());
            info.setUseMinMaxForPruning(false);
            if (CollectionUtils.isNotEmpty(allDeleteDeltaFiles)) {
                split.setDeleteDeltaFiles(getDeleteDeltaFiles(carbonFile.getAbsolutePath(), allDeleteDeltaFiles));
            }
            splits.add(split);
        }
        splits.sort(Comparator.comparing(o -> ((CarbonInputSplit) o).getFilePath()));
    }
    setAllColumnProjectionIfNotConfigured(job, carbonTable);
    return splits;
}
Also used : Segment(org.apache.carbondata.core.index.Segment) BlockletDetailInfo(org.apache.carbondata.core.indexstore.BlockletDetailInfo) TableInfo(org.apache.carbondata.core.metadata.schema.table.TableInfo) FileFactory(org.apache.carbondata.core.datastore.impl.FileFactory) LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) ArrayList(java.util.ArrayList) CarbonCommonConstants(org.apache.carbondata.core.constants.CarbonCommonConstants) ColumnarFormatVersion(org.apache.carbondata.core.metadata.ColumnarFormatVersion) CollectionUtils(org.apache.commons.collections.CollectionUtils) Configuration(org.apache.hadoop.conf.Configuration) LinkedList(java.util.LinkedList) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) ReadCommittedScope(org.apache.carbondata.core.readcommitter.ReadCommittedScope) InterfaceAudience(org.apache.carbondata.common.annotations.InterfaceAudience) CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) InputSplit(org.apache.hadoop.mapreduce.InputSplit) InterfaceStability(org.apache.carbondata.common.annotations.InterfaceStability) CarbonTablePath(org.apache.carbondata.core.util.path.CarbonTablePath) IOException(java.io.IOException) File(java.io.File) Serializable(java.io.Serializable) LatestFilesReadCommittedScope(org.apache.carbondata.core.readcommitter.LatestFilesReadCommittedScope) List(java.util.List) CarbonInputSplit(org.apache.carbondata.hadoop.CarbonInputSplit) FileFormat(org.apache.carbondata.core.statusmanager.FileFormat) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) JobContext(org.apache.hadoop.mapreduce.JobContext) Pattern(java.util.regex.Pattern) IndexFilter(org.apache.carbondata.core.index.IndexFilter) Comparator(java.util.Comparator) CarbonFileFilter(org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter) SchemaReader(org.apache.carbondata.core.metadata.schema.SchemaReader) ArrayUtils(org.apache.commons.lang.ArrayUtils) CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) LatestFilesReadCommittedScope(org.apache.carbondata.core.readcommitter.LatestFilesReadCommittedScope) LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) ArrayList(java.util.ArrayList) IOException(java.io.IOException) CarbonInputSplit(org.apache.carbondata.hadoop.CarbonInputSplit) Segment(org.apache.carbondata.core.index.Segment) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) ReadCommittedScope(org.apache.carbondata.core.readcommitter.ReadCommittedScope) LatestFilesReadCommittedScope(org.apache.carbondata.core.readcommitter.LatestFilesReadCommittedScope) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) BlockletDetailInfo(org.apache.carbondata.core.indexstore.BlockletDetailInfo) IndexFilter(org.apache.carbondata.core.index.IndexFilter) InputSplit(org.apache.hadoop.mapreduce.InputSplit) CarbonInputSplit(org.apache.carbondata.hadoop.CarbonInputSplit)

Example 2 with LatestFilesReadCommittedScope

use of org.apache.carbondata.core.readcommitter.LatestFilesReadCommittedScope in project carbondata by apache.

the class IndexUtil method executeClearIndexJob.

/**
 * This method gets the indexJob and call execute , this job will be launched before clearing
 * indexes from driver side during drop table and drop index and clears the index in executor
 * side
 * @param carbonTable
 * @throws IOException
 */
private static void executeClearIndexJob(IndexJob indexJob, CarbonTable carbonTable, String indexToClear) throws IOException {
    IndexInputFormat indexInputFormat;
    if (!carbonTable.isTransactionalTable()) {
        ReadCommittedScope readCommittedScope = new LatestFilesReadCommittedScope(carbonTable.getTablePath(), FileFactory.getConfiguration());
        LoadMetadataDetails[] loadMetadataDetails = readCommittedScope.getSegmentList();
        List<Segment> listOfValidSegments = new ArrayList<>(loadMetadataDetails.length);
        Arrays.stream(loadMetadataDetails).forEach(segment -> {
            Segment seg = new Segment(segment.getLoadName(), segment.getSegmentFile());
            seg.setLoadMetadataDetails(segment);
            listOfValidSegments.add(seg);
        });
        indexInputFormat = new IndexInputFormat(carbonTable, listOfValidSegments, new ArrayList<>(0), true, indexToClear);
    } else {
        SegmentStatusManager.ValidAndInvalidSegmentsInfo validAndInvalidSegmentsInfo = getValidAndInvalidSegments(carbonTable, FileFactory.getConfiguration());
        List<String> invalidSegment = new ArrayList<>();
        validAndInvalidSegmentsInfo.getInvalidSegments().forEach(segment -> invalidSegment.add(segment.getSegmentNo()));
        indexInputFormat = new IndexInputFormat(carbonTable, validAndInvalidSegmentsInfo.getValidSegments(), invalidSegment, true, indexToClear);
    }
    try {
        indexJob.execute(indexInputFormat, null);
    } catch (Exception e) {
        // Consider a scenario where clear index job is called from drop table
        // and index server crashes, in this no exception should be thrown and
        // drop table should complete.
        LOGGER.error("Failed to execute Index clear Job", e);
    }
}
Also used : LatestFilesReadCommittedScope(org.apache.carbondata.core.readcommitter.LatestFilesReadCommittedScope) LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) ArrayList(java.util.ArrayList) SegmentStatusManager(org.apache.carbondata.core.statusmanager.SegmentStatusManager) IOException(java.io.IOException) ReadCommittedScope(org.apache.carbondata.core.readcommitter.ReadCommittedScope) LatestFilesReadCommittedScope(org.apache.carbondata.core.readcommitter.LatestFilesReadCommittedScope)

Example 3 with LatestFilesReadCommittedScope

use of org.apache.carbondata.core.readcommitter.LatestFilesReadCommittedScope in project carbondata by apache.

the class CarbonTableInputFormat method getReadCommitted.

public ReadCommittedScope getReadCommitted(JobContext job, AbsoluteTableIdentifier identifier) throws IOException {
    if (readCommittedScope == null) {
        ReadCommittedScope readCommittedScope;
        if (job.getConfiguration().getBoolean(CARBON_TRANSACTIONAL_TABLE, true)) {
            readCommittedScope = new TableStatusReadCommittedScope(identifier, job.getConfiguration());
        } else {
            readCommittedScope = getReadCommittedScope(job.getConfiguration());
            if (readCommittedScope == null) {
                readCommittedScope = new LatestFilesReadCommittedScope(identifier.getTablePath(), job.getConfiguration());
            }
        }
        this.readCommittedScope = readCommittedScope;
    }
    return readCommittedScope;
}
Also used : ReadCommittedScope(org.apache.carbondata.core.readcommitter.ReadCommittedScope) TableStatusReadCommittedScope(org.apache.carbondata.core.readcommitter.TableStatusReadCommittedScope) LatestFilesReadCommittedScope(org.apache.carbondata.core.readcommitter.LatestFilesReadCommittedScope) LatestFilesReadCommittedScope(org.apache.carbondata.core.readcommitter.LatestFilesReadCommittedScope) TableStatusReadCommittedScope(org.apache.carbondata.core.readcommitter.TableStatusReadCommittedScope)

Aggregations

LatestFilesReadCommittedScope (org.apache.carbondata.core.readcommitter.LatestFilesReadCommittedScope)3 ReadCommittedScope (org.apache.carbondata.core.readcommitter.ReadCommittedScope)3 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 LoadMetadataDetails (org.apache.carbondata.core.statusmanager.LoadMetadataDetails)2 File (java.io.File)1 Serializable (java.io.Serializable)1 Comparator (java.util.Comparator)1 LinkedList (java.util.LinkedList)1 List (java.util.List)1 Pattern (java.util.regex.Pattern)1 InterfaceAudience (org.apache.carbondata.common.annotations.InterfaceAudience)1 InterfaceStability (org.apache.carbondata.common.annotations.InterfaceStability)1 CarbonCommonConstants (org.apache.carbondata.core.constants.CarbonCommonConstants)1 CarbonFile (org.apache.carbondata.core.datastore.filesystem.CarbonFile)1 CarbonFileFilter (org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter)1 FileFactory (org.apache.carbondata.core.datastore.impl.FileFactory)1 IndexFilter (org.apache.carbondata.core.index.IndexFilter)1 Segment (org.apache.carbondata.core.index.Segment)1 BlockletDetailInfo (org.apache.carbondata.core.indexstore.BlockletDetailInfo)1