Search in sources :

Example 1 with LongCounter

use of org.apache.flink.api.common.accumulators.LongCounter in project flink by apache.

the class DistCp method main.

public static void main(String[] args) throws Exception {
    // set up the execution environment
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    ParameterTool params = ParameterTool.fromArgs(args);
    if (!params.has("input") || !params.has("output")) {
        System.err.println("Usage: --input <path> --output <path> [--parallelism <n>]");
        return;
    }
    final Path sourcePath = new Path(params.get("input"));
    final Path targetPath = new Path(params.get("output"));
    if (!isLocal(env) && !(isOnDistributedFS(sourcePath) && isOnDistributedFS(targetPath))) {
        System.out.println("In a distributed mode only HDFS input/output paths are supported");
        return;
    }
    final int parallelism = params.getInt("parallelism", 10);
    if (parallelism <= 0) {
        System.err.println("Parallelism should be greater than 0");
        return;
    }
    // make parameters available in the web interface
    env.getConfig().setGlobalJobParameters(params);
    env.setParallelism(parallelism);
    long startTime = System.currentTimeMillis();
    LOGGER.info("Initializing copy tasks");
    List<FileCopyTask> tasks = getCopyTasks(sourcePath);
    LOGGER.info("Copy task initialization took " + (System.currentTimeMillis() - startTime) + "ms");
    DataSet<FileCopyTask> inputTasks = new DataSource<>(env, new FileCopyTaskInputFormat(tasks), new GenericTypeInfo<>(FileCopyTask.class), "fileCopyTasks");
    FlatMapOperator<FileCopyTask, Object> res = inputTasks.flatMap(new RichFlatMapFunction<FileCopyTask, Object>() {

        private static final long serialVersionUID = 1109254230243989929L;

        private LongCounter fileCounter;

        private LongCounter bytesCounter;

        @Override
        public void open(Configuration parameters) throws Exception {
            bytesCounter = getRuntimeContext().getLongCounter(BYTES_COPIED_CNT_NAME);
            fileCounter = getRuntimeContext().getLongCounter(FILES_COPIED_CNT_NAME);
        }

        @Override
        public void flatMap(FileCopyTask task, Collector<Object> out) throws Exception {
            LOGGER.info("Processing task: " + task);
            Path outPath = new Path(targetPath, task.getRelativePath());
            FileSystem targetFs = targetPath.getFileSystem();
            // creating parent folders in case of a local FS
            if (!targetFs.isDistributedFS()) {
                // dealing with cases like file:///tmp or just /tmp
                File outFile = outPath.toUri().isAbsolute() ? new File(outPath.toUri()) : new File(outPath.toString());
                File parentFile = outFile.getParentFile();
                if (!parentFile.mkdirs() && !parentFile.exists()) {
                    throw new RuntimeException("Cannot create local file system directories: " + parentFile);
                }
            }
            FSDataOutputStream outputStream = null;
            FSDataInputStream inputStream = null;
            try {
                outputStream = targetFs.create(outPath, FileSystem.WriteMode.OVERWRITE);
                inputStream = task.getPath().getFileSystem().open(task.getPath());
                int bytes = IOUtils.copy(inputStream, outputStream);
                bytesCounter.add(bytes);
            } finally {
                IOUtils.closeQuietly(inputStream);
                IOUtils.closeQuietly(outputStream);
            }
            fileCounter.add(1L);
        }
    });
    // no data sinks are needed, therefore just printing an empty result
    res.print();
    Map<String, Object> accumulators = env.getLastJobExecutionResult().getAllAccumulatorResults();
    LOGGER.info("== COUNTERS ==");
    for (Map.Entry<String, Object> e : accumulators.entrySet()) {
        LOGGER.info(e.getKey() + ": " + e.getValue());
    }
}
Also used : ParameterTool(org.apache.flink.api.java.utils.ParameterTool) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Configuration(org.apache.flink.configuration.Configuration) LongCounter(org.apache.flink.api.common.accumulators.LongCounter) FileSystem(org.apache.flink.core.fs.FileSystem) FSDataOutputStream(org.apache.flink.core.fs.FSDataOutputStream) Path(org.apache.flink.core.fs.Path) IOException(java.io.IOException) DataSource(org.apache.flink.api.java.operators.DataSource) FSDataInputStream(org.apache.flink.core.fs.FSDataInputStream) File(java.io.File) Map(java.util.Map)

Example 2 with LongCounter

use of org.apache.flink.api.common.accumulators.LongCounter in project flink by apache.

the class SubtaskExecutionAttemptAccumulatorsHandlerTest method testHandleRequest.

@Test
public void testHandleRequest() throws Exception {
    // Instance the handler.
    final RestHandlerConfiguration restHandlerConfiguration = RestHandlerConfiguration.fromConfiguration(new Configuration());
    final SubtaskExecutionAttemptAccumulatorsHandler handler = new SubtaskExecutionAttemptAccumulatorsHandler(() -> null, Time.milliseconds(100L), Collections.emptyMap(), SubtaskExecutionAttemptAccumulatorsHeaders.getInstance(), new DefaultExecutionGraphCache(restHandlerConfiguration.getTimeout(), Time.milliseconds(restHandlerConfiguration.getRefreshInterval())), TestingUtils.defaultExecutor());
    // Instance a empty request.
    final HandlerRequest<EmptyRequestBody> request = HandlerRequest.create(EmptyRequestBody.getInstance(), new SubtaskAttemptMessageParameters());
    final Map<String, OptionalFailure<Accumulator<?, ?>>> userAccumulators = new HashMap<>(3);
    userAccumulators.put("IntCounter", OptionalFailure.of(new IntCounter(10)));
    userAccumulators.put("LongCounter", OptionalFailure.of(new LongCounter(100L)));
    userAccumulators.put("Failure", OptionalFailure.ofFailure(new FlinkRuntimeException("Test")));
    // Instance the expected result.
    final StringifiedAccumulatorResult[] accumulatorResults = StringifiedAccumulatorResult.stringifyAccumulatorResults(userAccumulators);
    final int attemptNum = 1;
    final int subtaskIndex = 2;
    // Instance the tested execution.
    final ArchivedExecution execution = new ArchivedExecution(accumulatorResults, null, new ExecutionAttemptID(), attemptNum, ExecutionState.FINISHED, null, null, null, subtaskIndex, new long[ExecutionState.values().length]);
    // Invoke tested method.
    final SubtaskExecutionAttemptAccumulatorsInfo accumulatorsInfo = handler.handleRequest(request, execution);
    final ArrayList<UserAccumulator> userAccumulatorList = new ArrayList<>(userAccumulators.size());
    for (StringifiedAccumulatorResult accumulatorResult : accumulatorResults) {
        userAccumulatorList.add(new UserAccumulator(accumulatorResult.getName(), accumulatorResult.getType(), accumulatorResult.getValue()));
    }
    final SubtaskExecutionAttemptAccumulatorsInfo expected = new SubtaskExecutionAttemptAccumulatorsInfo(subtaskIndex, attemptNum, execution.getAttemptId().toString(), userAccumulatorList);
    // Verify.
    assertEquals(expected, accumulatorsInfo);
}
Also used : RestHandlerConfiguration(org.apache.flink.runtime.rest.handler.RestHandlerConfiguration) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) Configuration(org.apache.flink.configuration.Configuration) RestHandlerConfiguration(org.apache.flink.runtime.rest.handler.RestHandlerConfiguration) HashMap(java.util.HashMap) SubtaskAttemptMessageParameters(org.apache.flink.runtime.rest.messages.job.SubtaskAttemptMessageParameters) StringifiedAccumulatorResult(org.apache.flink.runtime.accumulators.StringifiedAccumulatorResult) ArrayList(java.util.ArrayList) ArchivedExecution(org.apache.flink.runtime.executiongraph.ArchivedExecution) SubtaskExecutionAttemptAccumulatorsInfo(org.apache.flink.runtime.rest.messages.job.SubtaskExecutionAttemptAccumulatorsInfo) EmptyRequestBody(org.apache.flink.runtime.rest.messages.EmptyRequestBody) LongCounter(org.apache.flink.api.common.accumulators.LongCounter) UserAccumulator(org.apache.flink.runtime.rest.messages.job.UserAccumulator) FlinkRuntimeException(org.apache.flink.util.FlinkRuntimeException) DefaultExecutionGraphCache(org.apache.flink.runtime.rest.handler.legacy.DefaultExecutionGraphCache) OptionalFailure(org.apache.flink.util.OptionalFailure) IntCounter(org.apache.flink.api.common.accumulators.IntCounter) Test(org.junit.Test)

Example 3 with LongCounter

use of org.apache.flink.api.common.accumulators.LongCounter in project flink by apache.

the class MiscellaneousIssuesITCase method testAccumulatorsAfterNoOp.

@Test
public void testAccumulatorsAfterNoOp() {
    final String accName = "test_accumulator";
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(6);
        env.generateSequence(1, 1000000).rebalance().flatMap(new RichFlatMapFunction<Long, Long>() {

            private LongCounter counter;

            @Override
            public void open(Configuration parameters) {
                counter = getRuntimeContext().getLongCounter(accName);
            }

            @Override
            public void flatMap(Long value, Collector<Long> out) {
                counter.add(1L);
            }
        }).output(new DiscardingOutputFormat<Long>());
        JobExecutionResult result = env.execute();
        assertEquals(1000000L, result.getAllAccumulatorResults().get(accName));
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : JobExecutionResult(org.apache.flink.api.common.JobExecutionResult) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Configuration(org.apache.flink.configuration.Configuration) MiniClusterResourceConfiguration(org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration) RichFlatMapFunction(org.apache.flink.api.common.functions.RichFlatMapFunction) Collector(org.apache.flink.util.Collector) LongCounter(org.apache.flink.api.common.accumulators.LongCounter) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) Test(org.junit.Test)

Aggregations

LongCounter (org.apache.flink.api.common.accumulators.LongCounter)3 Configuration (org.apache.flink.configuration.Configuration)3 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)2 Test (org.junit.Test)2 File (java.io.File)1 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1 JobExecutionResult (org.apache.flink.api.common.JobExecutionResult)1 IntCounter (org.apache.flink.api.common.accumulators.IntCounter)1 RichFlatMapFunction (org.apache.flink.api.common.functions.RichFlatMapFunction)1 DataSource (org.apache.flink.api.java.operators.DataSource)1 ParameterTool (org.apache.flink.api.java.utils.ParameterTool)1 FSDataInputStream (org.apache.flink.core.fs.FSDataInputStream)1 FSDataOutputStream (org.apache.flink.core.fs.FSDataOutputStream)1 FileSystem (org.apache.flink.core.fs.FileSystem)1 Path (org.apache.flink.core.fs.Path)1 StringifiedAccumulatorResult (org.apache.flink.runtime.accumulators.StringifiedAccumulatorResult)1 JobExecutionException (org.apache.flink.runtime.client.JobExecutionException)1