Search in sources :

Example 1 with SUCCESS_FILE_SUFFIX

use of org.apache.carbondata.core.util.path.CarbonTablePath.SUCCESS_FILE_SUFFIX in project carbondata by apache.

the class StageInputCollector method collectStageFiles.

/**
 * Collect all stage files and matched success files.
 * A stage file without success file will not be collected
 */
public static void collectStageFiles(CarbonTable table, Configuration hadoopConf, List<CarbonFile> stageInputList, List<CarbonFile> successFileList) {
    Objects.requireNonNull(table);
    Objects.requireNonNull(hadoopConf);
    Objects.requireNonNull(stageInputList);
    Objects.requireNonNull(successFileList);
    CarbonFile dir = FileFactory.getCarbonFile(table.getStagePath(), hadoopConf);
    if (dir.exists()) {
        // list the stage folder and collect all stage files who has corresponding success file,
        // which means the file is committed
        CarbonFile[] allFiles = dir.listFiles();
        Map<String, CarbonFile> map = new HashMap<>();
        Arrays.stream(allFiles).filter(file -> file.getName().endsWith(SUCCESS_FILE_SUFFIX)).forEach(file -> map.put(file.getName().substring(0, file.getName().indexOf(".")), file));
        Arrays.stream(allFiles).filter(file -> !file.getName().endsWith(SUCCESS_FILE_SUFFIX)).filter(file -> map.containsKey(file.getName())).forEach(carbonFile -> {
            stageInputList.add(carbonFile);
            successFileList.add(map.get(carbonFile.getName()));
        });
    }
}
Also used : Arrays(java.util.Arrays) HashMap(java.util.HashMap) FileFactory(org.apache.carbondata.core.datastore.impl.FileFactory) ArrayList(java.util.ArrayList) Logger(org.apache.log4j.Logger) Future(java.util.concurrent.Future) Gson(com.google.gson.Gson) Map(java.util.Map) Configuration(org.apache.hadoop.conf.Configuration) SUCCESS_FILE_SUFFIX(org.apache.carbondata.core.util.path.CarbonTablePath.SUCCESS_FILE_SUFFIX) LinkedList(java.util.LinkedList) LogServiceFactory(org.apache.carbondata.common.logging.LogServiceFactory) ExecutorService(java.util.concurrent.ExecutorService) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) InputSplit(org.apache.hadoop.mapreduce.InputSplit) IOException(java.io.IOException) InputStreamReader(java.io.InputStreamReader) Collectors(java.util.stream.Collectors) Executors(java.util.concurrent.Executors) Objects(java.util.Objects) ExecutionException(java.util.concurrent.ExecutionException) IOUtils(org.apache.commons.io.IOUtils) List(java.util.List) Collections(java.util.Collections) CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) HashMap(java.util.HashMap)

Aggregations

Gson (com.google.gson.Gson)1 IOException (java.io.IOException)1 InputStreamReader (java.io.InputStreamReader)1 ArrayList (java.util.ArrayList)1 Arrays (java.util.Arrays)1 Collections (java.util.Collections)1 HashMap (java.util.HashMap)1 LinkedList (java.util.LinkedList)1 List (java.util.List)1 Map (java.util.Map)1 Objects (java.util.Objects)1 ExecutionException (java.util.concurrent.ExecutionException)1 ExecutorService (java.util.concurrent.ExecutorService)1 Executors (java.util.concurrent.Executors)1 Future (java.util.concurrent.Future)1 Collectors (java.util.stream.Collectors)1 LogServiceFactory (org.apache.carbondata.common.logging.LogServiceFactory)1 CarbonFile (org.apache.carbondata.core.datastore.filesystem.CarbonFile)1 FileFactory (org.apache.carbondata.core.datastore.impl.FileFactory)1 CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)1