Search in sources :

Example 21 with Segment

use of org.apache.carbondata.core.index.Segment in project carbondata by apache.

the class CarbonDataMergerUtil method getSegListIUDCompactionQualified.

/**
 * method gets the segments list which get qualified for IUD compaction.
 * @param segments
 * @return
 */
public static List<String> getSegListIUDCompactionQualified(List<Segment> segments, SegmentUpdateStatusManager segmentUpdateStatusManager) throws IOException {
    List<String> validSegments = new ArrayList<>();
    int numberDeleteDeltaFilesThreshold = CarbonProperties.getInstance().getNoDeleteDeltaFilesThresholdForIUDCompaction();
    for (Segment seg : segments) {
        List<String> segmentNoAndBlocks = checkDeleteDeltaFilesInSeg(seg, segmentUpdateStatusManager, numberDeleteDeltaFilesThreshold);
        validSegments.addAll(segmentNoAndBlocks);
    }
    return validSegments;
}
Also used : ArrayList(java.util.ArrayList) Segment(org.apache.carbondata.core.index.Segment)

Example 22 with Segment

use of org.apache.carbondata.core.index.Segment in project carbondata by apache.

the class TestBlockletIndexFactory method getValidDistributables.

@Test
public void getValidDistributables() throws IOException {
    BlockletIndexInputSplit blockletIndexInputSplit = new BlockletIndexInputSplit("/opt/store/default/carbon_table/Fact/Part0/Segment_0/0_batchno0-0-1521012756709.carbonindex");
    Segment segment = new Segment("0", null, new TableStatusReadCommittedScope(carbonTable.getAbsoluteTableIdentifier(), new Configuration(false)));
    blockletIndexInputSplit.setSegment(segment);
    BlockletIndexInputSplit indexInputSplit = new BlockletIndexInputSplit("/opt/store/default/carbon_table/Fact/Part0/Segment_0/0_batchno0-0-1521012756701.carbonindex");
    indexInputSplit.setSegment(segment);
    List<IndexInputSplit> indexInputSplits = new ArrayList<>(2);
    indexInputSplits.add(blockletIndexInputSplit);
    indexInputSplits.add(indexInputSplit);
    new MockUp<BlockletIndexFactory>() {

        @Mock
        Set<TableBlockIndexUniqueIdentifier> getTableBlockIndexUniqueIdentifiers(Segment segment) {
            TableBlockIndexUniqueIdentifier tableBlockIndexUniqueIdentifier1 = new TableBlockIndexUniqueIdentifier("/opt/store/default/carbon_table/Fact/Part0/Segment_0", "0_batchno0-0-1521012756701.carbonindex", null, "0");
            Set<TableBlockIndexUniqueIdentifier> tableBlockIndexUniqueIdentifiers = new HashSet<>(3);
            tableBlockIndexUniqueIdentifiers.add(tableBlockIndexUniqueIdentifier);
            tableBlockIndexUniqueIdentifiers.add(tableBlockIndexUniqueIdentifier1);
            return tableBlockIndexUniqueIdentifiers;
        }
    };
    List<IndexInputSplit> validDistributables = blockletIndexFactory.getAllUncached(indexInputSplits);
    assert 1 == validDistributables.size();
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) IndexInputSplit(org.apache.carbondata.core.index.IndexInputSplit) ArrayList(java.util.ArrayList) TableBlockIndexUniqueIdentifier(org.apache.carbondata.core.indexstore.TableBlockIndexUniqueIdentifier) TableStatusReadCommittedScope(org.apache.carbondata.core.readcommitter.TableStatusReadCommittedScope) MockUp(mockit.MockUp) Segment(org.apache.carbondata.core.index.Segment) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 23 with Segment

use of org.apache.carbondata.core.index.Segment in project carbondata by apache.

the class BlockletIndexInputFormat method createRecordReader.

@Override
public RecordReader<TableBlockIndexUniqueIdentifier, BlockletIndexDetailsWithSchema> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) {
    return new RecordReader<TableBlockIndexUniqueIdentifier, BlockletIndexDetailsWithSchema>() {

        private BlockletIndexWrapper wrapper = null;

        private TableBlockIndexUniqueIdentifier tableBlockIndexUniqueIdentifier = null;

        private TableBlockIndexUniqueIdentifierWrapper tableBlockIndexUniqueIdentifierWrapper;

        Cache<TableBlockIndexUniqueIdentifierWrapper, BlockletIndexWrapper> cache = CacheProvider.getInstance().createCache(CacheType.DRIVER_BLOCKLET_INDEX);

        private Iterator<TableBlockIndexUniqueIdentifier> iterator;

        // Cache to avoid multiple times listing of files
        private Map<String, Map<String, BlockMetaInfo>> segInfoCache = new HashMap<>();

        @Override
        public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
            BlockletIndexInputSplit segmentDistributable = (BlockletIndexInputSplit) inputSplit;
            TableBlockIndexUniqueIdentifier tableSegmentUniqueIdentifier = segmentDistributable.getTableBlockIndexUniqueIdentifier();
            Segment segment = Segment.toSegment(tableSegmentUniqueIdentifier.getSegmentId(), readCommittedScope);
            iterator = BlockletIndexUtil.getTableBlockUniqueIdentifiers(segment).iterator();
        }

        @Override
        public boolean nextKeyValue() {
            if (iterator.hasNext()) {
                TableBlockIndexUniqueIdentifier tableBlockIndexUniqueIdentifier = iterator.next();
                this.tableBlockIndexUniqueIdentifier = tableBlockIndexUniqueIdentifier;
                TableBlockIndexUniqueIdentifierWrapper tableBlockIndexUniqueIdentifierWrapper = new TableBlockIndexUniqueIdentifierWrapper(tableBlockIndexUniqueIdentifier, table, false, true, true);
                this.tableBlockIndexUniqueIdentifierWrapper = tableBlockIndexUniqueIdentifierWrapper;
                wrapper = ((BlockletIndexStore) cache).get(tableBlockIndexUniqueIdentifierWrapper, segInfoCache);
                return true;
            }
            return false;
        }

        @Override
        public TableBlockIndexUniqueIdentifier getCurrentKey() {
            return tableBlockIndexUniqueIdentifier;
        }

        @Override
        public BlockletIndexDetailsWithSchema getCurrentValue() {
            BlockletIndexDetailsWithSchema blockletIndexDetailsWithSchema = new BlockletIndexDetailsWithSchema(wrapper, table.getTableInfo().isSchemaModified());
            return blockletIndexDetailsWithSchema;
        }

        @Override
        public float getProgress() {
            return 0;
        }

        @Override
        public void close() {
            if (null != tableBlockIndexUniqueIdentifierWrapper) {
                if (null != wrapper && null != wrapper.getIndexes() && !wrapper.getIndexes().isEmpty()) {
                    String segmentId = tableBlockIndexUniqueIdentifierWrapper.getTableBlockIndexUniqueIdentifier().getSegmentId();
                    // as segmentId will be same for all the indexes and segmentProperties cache is
                    // maintained at segment level so it need to be called only once for clearing
                    SegmentPropertiesAndSchemaHolder.getInstance().invalidate(segmentId, wrapper.getIndexes().get(0).getSegmentPropertiesWrapper(), tableBlockIndexUniqueIdentifierWrapper.isAddTableBlockToUnsafeAndLRUCache());
                }
            }
        }
    };
}
Also used : RecordReader(org.apache.hadoop.mapreduce.RecordReader) TableBlockIndexUniqueIdentifier(org.apache.carbondata.core.indexstore.TableBlockIndexUniqueIdentifier) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) Segment(org.apache.carbondata.core.index.Segment) TableBlockIndexUniqueIdentifierWrapper(org.apache.carbondata.core.indexstore.TableBlockIndexUniqueIdentifierWrapper) BlockletIndexInputSplit(org.apache.carbondata.core.indexstore.blockletindex.BlockletIndexInputSplit) Iterator(java.util.Iterator) HashMap(java.util.HashMap) Map(java.util.Map) IndexInputSplit(org.apache.carbondata.core.index.IndexInputSplit) InputSplit(org.apache.hadoop.mapreduce.InputSplit) BlockletIndexInputSplit(org.apache.carbondata.core.indexstore.blockletindex.BlockletIndexInputSplit) BlockletIndexWrapper(org.apache.carbondata.core.indexstore.BlockletIndexWrapper) Cache(org.apache.carbondata.core.cache.Cache) BlockMetaInfo(org.apache.carbondata.core.indexstore.BlockMetaInfo)

Example 24 with Segment

use of org.apache.carbondata.core.index.Segment in project carbondata by apache.

the class SegmentFileStore method writeSegmentFileForOthers.

public static boolean writeSegmentFileForOthers(CarbonTable carbonTable, Segment segment, PartitionSpec partitionSpec, List<FileStatus> partitionDataFiles) throws IOException {
    String tablePath = carbonTable.getTablePath();
    CarbonFile[] dataFiles = null;
    if (partitionDataFiles.isEmpty()) {
        CarbonFile segmentFolder = FileFactory.getCarbonFile(segment.getSegmentPath());
        dataFiles = segmentFolder.listFiles(file -> (!file.getName().equals("_SUCCESS") && !file.getName().endsWith(".crc")));
    } else {
        dataFiles = partitionDataFiles.stream().map(fileStatus -> FileFactory.getCarbonFile(fileStatus.getPath().toString())).toArray(CarbonFile[]::new);
    }
    if (dataFiles != null && dataFiles.length > 0) {
        SegmentFile segmentFile = new SegmentFile();
        segmentFile.setOptions(segment.getOptions());
        FolderDetails folderDetails = new FolderDetails();
        folderDetails.setStatus(SegmentStatus.SUCCESS.getMessage());
        folderDetails.setRelative(false);
        if (!partitionDataFiles.isEmpty()) {
            folderDetails.setPartitions(partitionSpec.getPartitions());
            segmentFile.addPath(partitionSpec.getLocation().toString(), folderDetails);
        } else {
            segmentFile.addPath(segment.getSegmentPath(), folderDetails);
        }
        for (CarbonFile file : dataFiles) {
            folderDetails.getFiles().add(file.getName());
        }
        String segmentFileFolder = CarbonTablePath.getSegmentFilesLocation(tablePath);
        CarbonFile carbonFile = FileFactory.getCarbonFile(segmentFileFolder);
        if (!carbonFile.exists()) {
            carbonFile.mkdirs();
        }
        // write segment info to new file.
        writeSegmentFile(segmentFile, segmentFileFolder + File.separator + segment.getSegmentFileName());
        return true;
    }
    return false;
}
Also used : Arrays(java.util.Arrays) AtomicFileOperationFactory(org.apache.carbondata.core.fileoperations.AtomicFileOperationFactory) FileStatus(org.apache.hadoop.fs.FileStatus) CarbonCommonConstants(org.apache.carbondata.core.constants.CarbonCommonConstants) Logger(org.apache.log4j.Logger) DataOutputStream(java.io.DataOutputStream) SegmentColumnMetaDataInfo(org.apache.carbondata.core.segmentmeta.SegmentColumnMetaDataInfo) Gson(com.google.gson.Gson) Map(java.util.Map) Configuration(org.apache.hadoop.conf.Configuration) Path(org.apache.hadoop.fs.Path) SegmentUpdateStatusManager(org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) DataFileFooter(org.apache.carbondata.core.metadata.blocklet.DataFileFooter) Set(java.util.Set) SegmentIndexFileStore(org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore) Collectors(java.util.stream.Collectors) CarbonLockUtil(org.apache.carbondata.core.locks.CarbonLockUtil) Serializable(java.io.Serializable) Objects(java.util.Objects) List(java.util.List) CarbonFileFilter(org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter) CarbonUtil(org.apache.carbondata.core.util.CarbonUtil) DataInputStream(java.io.DataInputStream) TrashUtil(org.apache.carbondata.core.util.TrashUtil) Segment(org.apache.carbondata.core.index.Segment) HashMap(java.util.HashMap) FileFactory(org.apache.carbondata.core.datastore.impl.FileFactory) SegmentStatus(org.apache.carbondata.core.statusmanager.SegmentStatus) LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) ObjectSerializationUtil(org.apache.carbondata.core.util.ObjectSerializationUtil) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) SegmentMetaDataInfo(org.apache.carbondata.core.segmentmeta.SegmentMetaDataInfo) Charset(java.nio.charset.Charset) FileWriteOperation(org.apache.carbondata.core.fileoperations.FileWriteOperation) OutputStreamWriter(java.io.OutputStreamWriter) CarbonUpdateUtil(org.apache.carbondata.core.mutate.CarbonUpdateUtil) LogServiceFactory(org.apache.carbondata.common.logging.LogServiceFactory) IndexStoreManager(org.apache.carbondata.core.index.IndexStoreManager) SegmentStatusManager(org.apache.carbondata.core.statusmanager.SegmentStatusManager) LinkedHashSet(java.util.LinkedHashSet) CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) BufferedWriter(java.io.BufferedWriter) CarbonTablePath(org.apache.carbondata.core.util.path.CarbonTablePath) ICarbonLock(org.apache.carbondata.core.locks.ICarbonLock) IOException(java.io.IOException) InputStreamReader(java.io.InputStreamReader) File(java.io.File) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) PartitionSpec(org.apache.carbondata.core.indexstore.PartitionSpec) DataFileFooterConverter(org.apache.carbondata.core.util.DataFileFooterConverter) SegmentMetaDataInfoStats(org.apache.carbondata.core.segmentmeta.SegmentMetaDataInfoStats) BufferedReader(java.io.BufferedReader) Collections(java.util.Collections) AtomicFileOperations(org.apache.carbondata.core.fileoperations.AtomicFileOperations) TableIndex(org.apache.carbondata.core.index.TableIndex) CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile)

Example 25 with Segment

use of org.apache.carbondata.core.index.Segment in project carbondata by apache.

the class SegmentStatusManager method getValidAndInvalidSegments.

/**
 * get valid segment for given load status details.
 */
public ValidAndInvalidSegmentsInfo getValidAndInvalidSegments(Boolean isChildTable, LoadMetadataDetails[] loadMetadataDetails, ReadCommittedScope readCommittedScope) throws IOException {
    // @TODO: move reading LoadStatus file to separate class
    List<Segment> listOfValidSegments = new ArrayList<>(10);
    List<Segment> listOfValidUpdatedSegments = new ArrayList<>(10);
    List<Segment> listOfInvalidSegments = new ArrayList<>(10);
    List<Segment> listOfStreamSegments = new ArrayList<>(10);
    List<Segment> listOfInProgressSegments = new ArrayList<>(10);
    Map<String, List<String>> mergedLoadMapping = new HashMap<>();
    try {
        if (loadMetadataDetails == null) {
            loadMetadataDetails = readTableStatusFile(CarbonTablePath.getTableStatusFilePath(identifier.getTablePath()));
        }
        if (readCommittedScope == null) {
            readCommittedScope = new TableStatusReadCommittedScope(identifier, loadMetadataDetails, configuration);
        }
        // just directly iterate Array
        for (LoadMetadataDetails segment : loadMetadataDetails) {
            if (SegmentStatus.SUCCESS == segment.getSegmentStatus() || SegmentStatus.MARKED_FOR_UPDATE == segment.getSegmentStatus() || SegmentStatus.LOAD_PARTIAL_SUCCESS == segment.getSegmentStatus() || SegmentStatus.STREAMING == segment.getSegmentStatus() || SegmentStatus.STREAMING_FINISH == segment.getSegmentStatus()) {
                // check for merged loads.
                if (null != segment.getMergedLoadName()) {
                    Segment seg = new Segment(segment.getMergedLoadName(), segment.getSegmentFile(), readCommittedScope, segment);
                    if (!listOfValidSegments.contains(seg)) {
                        listOfValidSegments.add(seg);
                    }
                    // if merged load is updated then put it in updated list
                    if (SegmentStatus.MARKED_FOR_UPDATE == segment.getSegmentStatus()) {
                        listOfValidUpdatedSegments.add(seg);
                    }
                    continue;
                }
                if (SegmentStatus.MARKED_FOR_UPDATE == segment.getSegmentStatus()) {
                    listOfValidUpdatedSegments.add(new Segment(segment.getLoadName(), segment.getSegmentFile(), readCommittedScope));
                }
                if (SegmentStatus.STREAMING == segment.getSegmentStatus() || SegmentStatus.STREAMING_FINISH == segment.getSegmentStatus()) {
                    listOfStreamSegments.add(new Segment(segment.getLoadName(), segment.getSegmentFile(), readCommittedScope));
                    continue;
                }
                // to validSegment list, as segment does not exists
                if (isChildTable) {
                    if (!segment.getDataSize().equalsIgnoreCase("0") && !segment.getIndexSize().equalsIgnoreCase("0")) {
                        listOfValidSegments.add(new Segment(segment.getLoadName(), segment.getSegmentFile(), readCommittedScope, segment));
                    }
                } else {
                    listOfValidSegments.add(new Segment(segment.getLoadName(), segment.getSegmentFile(), readCommittedScope, segment));
                }
            } else if ((SegmentStatus.LOAD_FAILURE == segment.getSegmentStatus() || SegmentStatus.COMPACTED == segment.getSegmentStatus() || SegmentStatus.MARKED_FOR_DELETE == segment.getSegmentStatus())) {
                listOfInvalidSegments.add(new Segment(segment.getLoadName(), segment.getSegmentFile()));
                if (SegmentStatus.COMPACTED == segment.getSegmentStatus()) {
                    // check the main table's merged segment's map. ex: {0.1 -> 0,1,2,3}
                    if (null != segment.getMergedLoadName()) {
                        if (mergedLoadMapping.containsKey(segment.getMergedLoadName())) {
                            mergedLoadMapping.get(segment.getMergedLoadName()).add(segment.getLoadName());
                        } else {
                            List<String> mergedLoads = new ArrayList<>();
                            mergedLoads.add(segment.getLoadName());
                            mergedLoadMapping.put(segment.getMergedLoadName(), mergedLoads);
                        }
                    }
                }
            } else if (SegmentStatus.INSERT_IN_PROGRESS == segment.getSegmentStatus() || SegmentStatus.INSERT_OVERWRITE_IN_PROGRESS == segment.getSegmentStatus()) {
                listOfInProgressSegments.add(new Segment(segment.getLoadName(), segment.getSegmentFile(), readCommittedScope));
            }
        }
    } catch (IOException e) {
        LOG.error(e.getMessage(), e);
        throw e;
    }
    return new ValidAndInvalidSegmentsInfo(listOfValidSegments, listOfValidUpdatedSegments, listOfInvalidSegments, listOfStreamSegments, listOfInProgressSegments, mergedLoadMapping);
}
Also used : TableStatusReadCommittedScope(org.apache.carbondata.core.readcommitter.TableStatusReadCommittedScope) IOException(java.io.IOException) Segment(org.apache.carbondata.core.index.Segment)

Aggregations

Segment (org.apache.carbondata.core.index.Segment)35 ArrayList (java.util.ArrayList)24 IOException (java.io.IOException)18 LoadMetadataDetails (org.apache.carbondata.core.statusmanager.LoadMetadataDetails)14 SegmentStatusManager (org.apache.carbondata.core.statusmanager.SegmentStatusManager)11 HashMap (java.util.HashMap)10 List (java.util.List)9 Map (java.util.Map)8 AbsoluteTableIdentifier (org.apache.carbondata.core.metadata.AbsoluteTableIdentifier)8 CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)8 SegmentUpdateStatusManager (org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager)8 CarbonCommonConstants (org.apache.carbondata.core.constants.CarbonCommonConstants)7 HashSet (java.util.HashSet)6 CarbonFile (org.apache.carbondata.core.datastore.filesystem.CarbonFile)6 FileFactory (org.apache.carbondata.core.datastore.impl.FileFactory)6 TableIndex (org.apache.carbondata.core.index.TableIndex)6 Collectors (java.util.stream.Collectors)5 LogServiceFactory (org.apache.carbondata.common.logging.LogServiceFactory)5 IndexFilter (org.apache.carbondata.core.index.IndexFilter)5 PartitionSpec (org.apache.carbondata.core.indexstore.PartitionSpec)5