Search in sources :

Example 6 with Segment

use of org.apache.carbondata.core.datamap.Segment in project carbondata by apache.

the class CarbonTableInputFormat method getSplitsOfStreaming.

/**
 * use file list in .carbonindex file to get the split of streaming.
 */
public List<InputSplit> getSplitsOfStreaming(JobContext job, AbsoluteTableIdentifier identifier, List<Segment> streamSegments) throws IOException {
    List<InputSplit> splits = new ArrayList<InputSplit>();
    if (streamSegments != null && !streamSegments.isEmpty()) {
        numStreamSegments = streamSegments.size();
        long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
        long maxSize = getMaxSplitSize(job);
        for (Segment segment : streamSegments) {
            String segmentDir = CarbonTablePath.getSegmentPath(identifier.getTablePath(), segment.getSegmentNo());
            FileFactory.FileType fileType = FileFactory.getFileType(segmentDir);
            if (FileFactory.isFileExist(segmentDir, fileType)) {
                String indexName = CarbonTablePath.getCarbonStreamIndexFileName();
                String indexPath = segmentDir + File.separator + indexName;
                CarbonFile index = FileFactory.getCarbonFile(indexPath, fileType);
                // index file exists
                if (index.exists()) {
                    // data file exists
                    CarbonIndexFileReader indexReader = new CarbonIndexFileReader();
                    try {
                        // map block index
                        indexReader.openThriftReader(indexPath);
                        while (indexReader.hasNext()) {
                            BlockIndex blockIndex = indexReader.readBlockIndexInfo();
                            String filePath = segmentDir + File.separator + blockIndex.getFile_name();
                            Path path = new Path(filePath);
                            long length = blockIndex.getFile_size();
                            if (length != 0) {
                                BlockLocation[] blkLocations;
                                FileSystem fs = FileFactory.getFileSystem(path);
                                FileStatus file = fs.getFileStatus(path);
                                blkLocations = fs.getFileBlockLocations(path, 0, length);
                                long blockSize = file.getBlockSize();
                                long splitSize = computeSplitSize(blockSize, minSize, maxSize);
                                long bytesRemaining = length;
                                while (((double) bytesRemaining) / splitSize > 1.1) {
                                    int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
                                    splits.add(makeSplit(segment.getSegmentNo(), path, length - bytesRemaining, splitSize, blkLocations[blkIndex].getHosts(), blkLocations[blkIndex].getCachedHosts(), FileFormat.ROW_V1));
                                    bytesRemaining -= splitSize;
                                }
                                if (bytesRemaining != 0) {
                                    int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
                                    splits.add(makeSplit(segment.getSegmentNo(), path, length - bytesRemaining, bytesRemaining, blkLocations[blkIndex].getHosts(), blkLocations[blkIndex].getCachedHosts(), FileFormat.ROW_V1));
                                }
                            } else {
                                // Create empty hosts array for zero length files
                                splits.add(makeSplit(segment.getSegmentNo(), path, 0, length, new String[0], FileFormat.ROW_V1));
                            }
                        }
                    } finally {
                        indexReader.closeThriftReader();
                    }
                }
            }
        }
    }
    return splits;
}
Also used : Path(org.apache.hadoop.fs.Path) CarbonTablePath(org.apache.carbondata.core.util.path.CarbonTablePath) CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) FileStatus(org.apache.hadoop.fs.FileStatus) CarbonIndexFileReader(org.apache.carbondata.core.reader.CarbonIndexFileReader) ArrayList(java.util.ArrayList) BlockLocation(org.apache.hadoop.fs.BlockLocation) BlockIndex(org.apache.carbondata.format.BlockIndex) Segment(org.apache.carbondata.core.datamap.Segment) FileFactory(org.apache.carbondata.core.datastore.impl.FileFactory) FileSystem(org.apache.hadoop.fs.FileSystem) InputSplit(org.apache.hadoop.mapreduce.InputSplit) CarbonInputSplit(org.apache.carbondata.hadoop.CarbonInputSplit)

Example 7 with Segment

use of org.apache.carbondata.core.datamap.Segment in project carbondata by apache.

the class CarbonInputSplit method setInvalidSegments.

public void setInvalidSegments(List<Segment> invalidSegments) {
    List<String> invalidSegmentIds = new ArrayList<>();
    for (Segment segment : invalidSegments) {
        invalidSegmentIds.add(segment.getSegmentNo());
    }
    this.invalidSegments = invalidSegmentIds;
}
Also used : ArrayList(java.util.ArrayList) Segment(org.apache.carbondata.core.datamap.Segment)

Example 8 with Segment

use of org.apache.carbondata.core.datamap.Segment in project carbondata by apache.

the class DataMapWriterListener method register.

/**
 * Register a DataMapWriter
 */
private void register(DataMapFactory factory, String segmentId, String dataWritePath) {
    assert (factory != null);
    assert (segmentId != null);
    DataMapMeta meta = factory.getMeta();
    if (meta == null) {
        // if data map does not have meta, no need to register
        return;
    }
    List<String> columns = factory.getMeta().getIndexedColumns();
    List<DataMapWriter> writers = registry.get(columns);
    DataMapWriter writer = factory.createWriter(new Segment(segmentId, null), dataWritePath);
    if (writers != null) {
        writers.add(writer);
    } else {
        writers = new ArrayList<>();
        writers.add(writer);
        registry.put(columns, writers);
    }
    LOG.info("DataMapWriter " + writer + " added");
}
Also used : DataMapWriter(org.apache.carbondata.core.datamap.dev.DataMapWriter) DataMapMeta(org.apache.carbondata.core.datamap.DataMapMeta) Segment(org.apache.carbondata.core.datamap.Segment)

Example 9 with Segment

use of org.apache.carbondata.core.datamap.Segment in project carbondata by apache.

the class CarbonUtil method validateRangeOfSegmentList.

public static boolean validateRangeOfSegmentList(String segmentId) throws InvalidConfigurationException {
    String[] values = segmentId.split(",");
    try {
        if (values.length == 0) {
            throw new InvalidConfigurationException("carbon.input.segments.<database_name>.<table_name> value can't be empty.");
        }
        for (String value : values) {
            if (!value.equalsIgnoreCase("*")) {
                Segment segment = Segment.toSegment(value);
                Float aFloatValue = Float.parseFloat(segment.getSegmentNo());
                if (aFloatValue < 0 || aFloatValue > Float.MAX_VALUE) {
                    throw new InvalidConfigurationException("carbon.input.segments.<database_name>.<table_name> value range should be greater " + "than 0 and less than " + Float.MAX_VALUE);
                }
            }
        }
    } catch (NumberFormatException nfe) {
        throw new InvalidConfigurationException("carbon.input.segments.<database_name>.<table_name> value range is not valid");
    }
    return true;
}
Also used : Segment(org.apache.carbondata.core.datamap.Segment) InvalidConfigurationException(org.apache.carbondata.core.exception.InvalidConfigurationException)

Example 10 with Segment

use of org.apache.carbondata.core.datamap.Segment in project carbondata by apache.

the class SegmentStatusManager method getValidAndInvalidSegments.

/**
 * get valid segment for given load status details.
 */
public ValidAndInvalidSegmentsInfo getValidAndInvalidSegments(LoadMetadataDetails[] loadMetadataDetails) throws IOException {
    // @TODO: move reading LoadStatus file to separate class
    List<Segment> listOfValidSegments = new ArrayList<>(10);
    List<Segment> listOfValidUpdatedSegments = new ArrayList<>(10);
    List<Segment> listOfInvalidSegments = new ArrayList<>(10);
    List<Segment> listOfStreamSegments = new ArrayList<>(10);
    List<Segment> listOfInProgressSegments = new ArrayList<>(10);
    try {
        if (loadMetadataDetails == null) {
            loadMetadataDetails = readTableStatusFile(CarbonTablePath.getTableStatusFilePath(identifier.getTablePath()));
        }
        // just directly iterate Array
        for (LoadMetadataDetails segment : loadMetadataDetails) {
            if (SegmentStatus.SUCCESS == segment.getSegmentStatus() || SegmentStatus.MARKED_FOR_UPDATE == segment.getSegmentStatus() || SegmentStatus.LOAD_PARTIAL_SUCCESS == segment.getSegmentStatus() || SegmentStatus.STREAMING == segment.getSegmentStatus() || SegmentStatus.STREAMING_FINISH == segment.getSegmentStatus()) {
                // check for merged loads.
                if (null != segment.getMergedLoadName()) {
                    Segment seg = new Segment(segment.getMergedLoadName(), segment.getSegmentFile());
                    if (!listOfValidSegments.contains(seg)) {
                        listOfValidSegments.add(seg);
                    }
                    // if merged load is updated then put it in updated list
                    if (SegmentStatus.MARKED_FOR_UPDATE == segment.getSegmentStatus()) {
                        listOfValidUpdatedSegments.add(seg);
                    }
                    continue;
                }
                if (SegmentStatus.MARKED_FOR_UPDATE == segment.getSegmentStatus()) {
                    listOfValidUpdatedSegments.add(new Segment(segment.getLoadName(), segment.getSegmentFile()));
                }
                if (SegmentStatus.STREAMING == segment.getSegmentStatus() || SegmentStatus.STREAMING_FINISH == segment.getSegmentStatus()) {
                    listOfStreamSegments.add(new Segment(segment.getLoadName(), segment.getSegmentFile()));
                    continue;
                }
                listOfValidSegments.add(new Segment(segment.getLoadName(), segment.getSegmentFile()));
            } else if ((SegmentStatus.LOAD_FAILURE == segment.getSegmentStatus() || SegmentStatus.COMPACTED == segment.getSegmentStatus() || SegmentStatus.MARKED_FOR_DELETE == segment.getSegmentStatus())) {
                listOfInvalidSegments.add(new Segment(segment.getLoadName(), segment.getSegmentFile()));
            } else if (SegmentStatus.INSERT_IN_PROGRESS == segment.getSegmentStatus() || SegmentStatus.INSERT_OVERWRITE_IN_PROGRESS == segment.getSegmentStatus()) {
                listOfInProgressSegments.add(new Segment(segment.getLoadName(), segment.getSegmentFile()));
            }
        }
    } catch (IOException e) {
        LOG.error(e);
        throw e;
    }
    return new ValidAndInvalidSegmentsInfo(listOfValidSegments, listOfValidUpdatedSegments, listOfInvalidSegments, listOfStreamSegments, listOfInProgressSegments);
}
Also used : ArrayList(java.util.ArrayList) IOException(java.io.IOException) Segment(org.apache.carbondata.core.datamap.Segment)

Aggregations

Segment (org.apache.carbondata.core.datamap.Segment)23 ArrayList (java.util.ArrayList)10 IOException (java.io.IOException)8 LoadMetadataDetails (org.apache.carbondata.core.statusmanager.LoadMetadataDetails)8 SegmentStatusManager (org.apache.carbondata.core.statusmanager.SegmentStatusManager)8 AbsoluteTableIdentifier (org.apache.carbondata.core.metadata.AbsoluteTableIdentifier)6 CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)6 CarbonFile (org.apache.carbondata.core.datastore.filesystem.CarbonFile)5 SegmentUpdateStatusManager (org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager)5 CarbonInputSplit (org.apache.carbondata.hadoop.CarbonInputSplit)4 InputSplit (org.apache.hadoop.mapreduce.InputSplit)4 SegmentFileStore (org.apache.carbondata.core.metadata.SegmentFileStore)3 Expression (org.apache.carbondata.core.scan.expression.Expression)3 SingleTableProvider (org.apache.carbondata.core.scan.filter.SingleTableProvider)3 TableProvider (org.apache.carbondata.core.scan.filter.TableProvider)3 FilterResolverIntf (org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf)3 BitSet (java.util.BitSet)2 HashSet (java.util.HashSet)2 FileFactory (org.apache.carbondata.core.datastore.impl.FileFactory)2 ICarbonLock (org.apache.carbondata.core.locks.ICarbonLock)2