Search in sources :

Example 46 with CarbonFile

use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.

the class BlockletIndexUtil method createCarbonDataFileBlockMetaInfoMapping.

/**
 * This method will create file name to block Meta Info Mapping. This method will reduce the
 * number of nameNode calls and using this method one namenode will fetch 1000 entries
 *
 * @param segmentFilePath
 * @return
 * @throws IOException
 */
public static Map<String, BlockMetaInfo> createCarbonDataFileBlockMetaInfoMapping(String segmentFilePath, Configuration configuration) throws IOException {
    Map<String, BlockMetaInfo> fileNameToMetaInfoMapping = new TreeMap();
    CarbonFile carbonFile = FileFactory.getCarbonFile(segmentFilePath, configuration);
    if (carbonFile instanceof AbstractDFSCarbonFile && !(carbonFile instanceof S3CarbonFile)) {
        PathFilter pathFilter = new PathFilter() {

            @Override
            public boolean accept(Path path) {
                return CarbonTablePath.isCarbonDataFile(path.getName());
            }
        };
        CarbonFile[] carbonFiles = carbonFile.locationAwareListFiles(pathFilter);
        for (CarbonFile file : carbonFiles) {
            String[] location = file.getLocations();
            long len = file.getSize();
            BlockMetaInfo blockMetaInfo = new BlockMetaInfo(location, len);
            fileNameToMetaInfoMapping.put(file.getPath(), blockMetaInfo);
        }
    }
    return fileNameToMetaInfoMapping;
}
Also used : Path(org.apache.hadoop.fs.Path) CarbonTablePath(org.apache.carbondata.core.util.path.CarbonTablePath) S3CarbonFile(org.apache.carbondata.core.datastore.filesystem.S3CarbonFile) CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) AbstractDFSCarbonFile(org.apache.carbondata.core.datastore.filesystem.AbstractDFSCarbonFile) PathFilter(org.apache.hadoop.fs.PathFilter) AbstractDFSCarbonFile(org.apache.carbondata.core.datastore.filesystem.AbstractDFSCarbonFile) S3CarbonFile(org.apache.carbondata.core.datastore.filesystem.S3CarbonFile) TreeMap(java.util.TreeMap) BlockMetaInfo(org.apache.carbondata.core.indexstore.BlockMetaInfo)

Example 47 with CarbonFile

use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.

the class CarbonTestUtil method getDimRawChunk.

/**
 * this method returns true if local dictionary is created for all the blocklets or not
 * @param storePath
 * @param blockindex
 * @return dimensionRawColumnChunks
 */
public static ArrayList<DimensionRawColumnChunk> getDimRawChunk(String storePath, Integer blockindex) throws IOException {
    CarbonFile[] dataFiles = FileFactory.getCarbonFile(storePath).listFiles(new CarbonFileFilter() {

        @Override
        public boolean accept(CarbonFile file) {
            if (file.getName().endsWith(CarbonCommonConstants.FACT_FILE_EXT)) {
                return true;
            } else {
                return false;
            }
        }
    });
    ArrayList<DimensionRawColumnChunk> dimensionRawColumnChunks = read(dataFiles[0].getAbsolutePath(), blockindex);
    return dimensionRawColumnChunks;
}
Also used : CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) CarbonFileFilter(org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter) DimensionRawColumnChunk(org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk)

Example 48 with CarbonFile

use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.

the class StageInputCollector method createInputSplits.

/**
 * Read stage files and create input splits from them
 */
public static List<InputSplit> createInputSplits(ExecutorService executorService, List<CarbonFile> stageFiles) throws ExecutionException, InterruptedException {
    Objects.requireNonNull(executorService);
    Objects.requireNonNull(stageFiles);
    long startTime = System.currentTimeMillis();
    List<InputSplit> output = Collections.synchronizedList(new ArrayList<>());
    Gson gson = new Gson();
    // read each stage file and create input split
    // read them using a thread pool to increase parallelism
    List<Future<Boolean>> futures = stageFiles.stream().map(stageFile -> executorService.submit(() -> {
        String filePath = stageFile.getAbsolutePath();
        InputStreamReader reader = null;
        try {
            reader = new InputStreamReader(FileFactory.getDataInputStream(filePath));
            StageInput stageInput = gson.fromJson(reader, StageInput.class);
            output.addAll(stageInput.createSplits());
            return true;
        } catch (IOException e) {
            LOGGER.error("failed to read stage file " + filePath);
            return false;
        } finally {
            IOUtils.closeQuietly(reader);
        }
    })).collect(Collectors.toList());
    for (Future<Boolean> future : futures) {
        future.get();
    }
    LOGGER.info("read stage files taken " + (System.currentTimeMillis() - startTime) + "ms");
    return output;
}
Also used : Arrays(java.util.Arrays) HashMap(java.util.HashMap) FileFactory(org.apache.carbondata.core.datastore.impl.FileFactory) ArrayList(java.util.ArrayList) Logger(org.apache.log4j.Logger) Future(java.util.concurrent.Future) Gson(com.google.gson.Gson) Map(java.util.Map) Configuration(org.apache.hadoop.conf.Configuration) SUCCESS_FILE_SUFFIX(org.apache.carbondata.core.util.path.CarbonTablePath.SUCCESS_FILE_SUFFIX) LinkedList(java.util.LinkedList) LogServiceFactory(org.apache.carbondata.common.logging.LogServiceFactory) ExecutorService(java.util.concurrent.ExecutorService) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) InputSplit(org.apache.hadoop.mapreduce.InputSplit) IOException(java.io.IOException) InputStreamReader(java.io.InputStreamReader) Collectors(java.util.stream.Collectors) Executors(java.util.concurrent.Executors) Objects(java.util.Objects) ExecutionException(java.util.concurrent.ExecutionException) IOUtils(org.apache.commons.io.IOUtils) List(java.util.List) Collections(java.util.Collections) InputStreamReader(java.io.InputStreamReader) Gson(com.google.gson.Gson) IOException(java.io.IOException) Future(java.util.concurrent.Future) InputSplit(org.apache.hadoop.mapreduce.InputSplit)

Example 49 with CarbonFile

use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.

the class StageInputCollector method collectStageFiles.

/**
 * Collect all stage files and matched success files.
 * A stage file without success file will not be collected
 */
public static void collectStageFiles(CarbonTable table, Configuration hadoopConf, List<CarbonFile> stageInputList, List<CarbonFile> successFileList) {
    Objects.requireNonNull(table);
    Objects.requireNonNull(hadoopConf);
    Objects.requireNonNull(stageInputList);
    Objects.requireNonNull(successFileList);
    CarbonFile dir = FileFactory.getCarbonFile(table.getStagePath(), hadoopConf);
    if (dir.exists()) {
        // list the stage folder and collect all stage files who has corresponding success file,
        // which means the file is committed
        CarbonFile[] allFiles = dir.listFiles();
        Map<String, CarbonFile> map = new HashMap<>();
        Arrays.stream(allFiles).filter(file -> file.getName().endsWith(SUCCESS_FILE_SUFFIX)).forEach(file -> map.put(file.getName().substring(0, file.getName().indexOf(".")), file));
        Arrays.stream(allFiles).filter(file -> !file.getName().endsWith(SUCCESS_FILE_SUFFIX)).filter(file -> map.containsKey(file.getName())).forEach(carbonFile -> {
            stageInputList.add(carbonFile);
            successFileList.add(map.get(carbonFile.getName()));
        });
    }
}
Also used : Arrays(java.util.Arrays) HashMap(java.util.HashMap) FileFactory(org.apache.carbondata.core.datastore.impl.FileFactory) ArrayList(java.util.ArrayList) Logger(org.apache.log4j.Logger) Future(java.util.concurrent.Future) Gson(com.google.gson.Gson) Map(java.util.Map) Configuration(org.apache.hadoop.conf.Configuration) SUCCESS_FILE_SUFFIX(org.apache.carbondata.core.util.path.CarbonTablePath.SUCCESS_FILE_SUFFIX) LinkedList(java.util.LinkedList) LogServiceFactory(org.apache.carbondata.common.logging.LogServiceFactory) ExecutorService(java.util.concurrent.ExecutorService) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) InputSplit(org.apache.hadoop.mapreduce.InputSplit) IOException(java.io.IOException) InputStreamReader(java.io.InputStreamReader) Collectors(java.util.stream.Collectors) Executors(java.util.concurrent.Executors) Objects(java.util.Objects) ExecutionException(java.util.concurrent.ExecutionException) IOUtils(org.apache.commons.io.IOUtils) List(java.util.List) Collections(java.util.Collections) CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) HashMap(java.util.HashMap)

Example 50 with CarbonFile

use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.

the class SegmentUpdateStatusManager method getDeleteDeltaInvalidFilesList.

/**
 * @param block
 * @param needCompleteList
 * @return
 */
public CarbonFile[] getDeleteDeltaInvalidFilesList(final SegmentUpdateDetails block, final boolean needCompleteList, CarbonFile[] allSegmentFiles, boolean isAbortedFile) {
    final long deltaStartTimestamp = getStartTimeOfDeltaFile(CarbonCommonConstants.DELETE_DELTA_FILE_EXT, block);
    final long deltaEndTimestamp = getEndTimeOfDeltaFile(CarbonCommonConstants.DELETE_DELTA_FILE_EXT, block);
    Set<CarbonFile> files = new HashSet<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
    for (CarbonFile eachFile : allSegmentFiles) {
        String fileName = eachFile.getName();
        if (fileName.endsWith(CarbonCommonConstants.DELETE_DELTA_FILE_EXT)) {
            String blkName = CarbonTablePath.DataFileUtil.getBlockNameFromDeleteDeltaFile(fileName);
            // complete list of delta files of that block is returned.
            if (needCompleteList && block.getBlockName().equalsIgnoreCase(blkName)) {
                files.add(eachFile);
            }
            // invalid delete delta files only will be returned.
            long timestamp = CarbonUpdateUtil.getTimeStampAsLong(CarbonTablePath.DataFileUtil.getTimeStampFromDeleteDeltaFile(fileName));
            if (block.getBlockName().equalsIgnoreCase(blkName)) {
                if (isAbortedFile) {
                    if (timestamp > deltaEndTimestamp) {
                        files.add(eachFile);
                    }
                } else if (timestamp < deltaStartTimestamp || timestamp > deltaEndTimestamp) {
                    files.add(eachFile);
                }
            }
        }
    }
    return files.toArray(new CarbonFile[files.size()]);
}
Also used : CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile)

Aggregations

CarbonFile (org.apache.carbondata.core.datastore.filesystem.CarbonFile)161 IOException (java.io.IOException)47 CarbonFileFilter (org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter)45 ArrayList (java.util.ArrayList)38 HashMap (java.util.HashMap)20 FileFactory (org.apache.carbondata.core.datastore.impl.FileFactory)18 CarbonTablePath (org.apache.carbondata.core.util.path.CarbonTablePath)18 Path (org.apache.hadoop.fs.Path)15 List (java.util.List)11 CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)11 Map (java.util.Map)10 HashSet (java.util.HashSet)9 LoadMetadataDetails (org.apache.carbondata.core.statusmanager.LoadMetadataDetails)9 LinkedList (java.util.LinkedList)6 BlockIndex (org.apache.carbondata.format.BlockIndex)6 Segment (org.apache.carbondata.core.index.Segment)5 CarbonIndexFileReader (org.apache.carbondata.core.reader.CarbonIndexFileReader)5 Configuration (org.apache.hadoop.conf.Configuration)5 FileSystem (org.apache.hadoop.fs.FileSystem)5 Test (org.junit.Test)5