Examples with HadoopTaskInput - org.apache.ignite.internal.processors.hadoop.HadoopTaskInput

Example 6 with HadoopTaskInput

use of org.apache.ignite.internal.processors.hadoop.HadoopTaskInput in project ignite by apache.

the class HadoopRunnableTask method runTask.

/**
 * @param perfCntr Performance counter.
 * @throws IgniteCheckedException If failed.
 */
private void runTask(HadoopPerformanceCounter perfCntr) throws IgniteCheckedException {
    if (cancelled)
        throw new HadoopTaskCancelledException("Task cancelled.");
    try (HadoopTaskOutput out = createOutputInternal(ctx);
        HadoopTaskInput in = createInputInternal(ctx)) {
        ctx.input(in);
        ctx.output(out);
        perfCntr.onTaskStart(ctx.taskInfo(), U.currentTimeMillis());
        ctx.run();
    }
}

Also used : HadoopTaskInput(org.apache.ignite.internal.processors.hadoop.HadoopTaskInput) HadoopTaskCancelledException(org.apache.ignite.internal.processors.hadoop.HadoopTaskCancelledException) HadoopTaskOutput(org.apache.ignite.internal.processors.hadoop.HadoopTaskOutput)

Example 7 with HadoopTaskInput

use of org.apache.ignite.internal.processors.hadoop.HadoopTaskInput in project ignite by apache.

the class HadoopV1ReduceTask method run.

/**
 * {@inheritDoc}
 */
@SuppressWarnings("unchecked")
@Override
public void run(HadoopTaskContext taskCtx) throws IgniteCheckedException {
    HadoopJobEx job = taskCtx.job();
    HadoopV2TaskContext taskCtx0 = (HadoopV2TaskContext) taskCtx;
    if (!reduce && taskCtx.taskInfo().hasMapperIndex())
        HadoopMapperUtils.mapperIndex(taskCtx.taskInfo().mapperIndex());
    else
        HadoopMapperUtils.clearMapperIndex();
    try {
        JobConf jobConf = taskCtx0.jobConf();
        HadoopTaskInput input = taskCtx.input();
        HadoopV1OutputCollector collector = null;
        try {
            collector = collector(jobConf, taskCtx0, reduce || !job.info().hasReducer(), fileName(), taskCtx0.attemptId());
            Reducer reducer;
            if (reduce)
                reducer = ReflectionUtils.newInstance(jobConf.getReducerClass(), jobConf);
            else
                reducer = ReflectionUtils.newInstance(jobConf.getCombinerClass(), jobConf);
            assert reducer != null;
            try {
                try {
                    while (input.next()) {
                        if (isCancelled())
                            throw new HadoopTaskCancelledException("Reduce task cancelled.");
                        reducer.reduce(input.key(), input.values(), collector, Reporter.NULL);
                    }
                    if (!reduce)
                        taskCtx.onMapperFinished();
                } finally {
                    reducer.close();
                }
            } finally {
                collector.closeWriter();
            }
            collector.commit();
        } catch (Exception e) {
            if (collector != null)
                collector.abort();
            throw new IgniteCheckedException(e);
        }
    } finally {
        if (!reduce)
            HadoopMapperUtils.clearMapperIndex();
    }
}

Also used : HadoopTaskInput(org.apache.ignite.internal.processors.hadoop.HadoopTaskInput) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) HadoopJobEx(org.apache.ignite.internal.processors.hadoop.HadoopJobEx) HadoopTaskCancelledException(org.apache.ignite.internal.processors.hadoop.HadoopTaskCancelledException) Reducer(org.apache.hadoop.mapred.Reducer) JobConf(org.apache.hadoop.mapred.JobConf) HadoopV2TaskContext(org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopV2TaskContext) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) HadoopTaskCancelledException(org.apache.ignite.internal.processors.hadoop.HadoopTaskCancelledException)

Example 8 with HadoopTaskInput

use of org.apache.ignite.internal.processors.hadoop.HadoopTaskInput in project ignite by apache.

the class HadoopHashMapSelfTest method check.

private void check(HadoopHashMultimap m, Multimap<Integer, Integer> mm, HadoopTaskContext taskCtx) throws Exception {
    final HadoopTaskInput in = m.input(taskCtx);
    Map<Integer, Collection<Integer>> mmm = mm.asMap();
    int keys = 0;
    while (in.next()) {
        keys++;
        IntWritable k = (IntWritable) in.key();
        assertNotNull(k);
        ArrayList<Integer> vs = new ArrayList<>();
        Iterator<?> it = in.values();
        while (it.hasNext()) vs.add(((IntWritable) it.next()).get());
        Collection<Integer> exp = mmm.get(k.get());
        assertEquals(sorted(exp), sorted(vs));
    }
    X.println("keys: " + keys + " cap: " + m.capacity());
    assertEquals(mmm.size(), keys);
    assertEquals(m.keys(), keys);
    in.close();
}

Also used : HadoopTaskInput(org.apache.ignite.internal.processors.hadoop.HadoopTaskInput) ArrayList(java.util.ArrayList) Collection(java.util.Collection) IntWritable(org.apache.hadoop.io.IntWritable)

Example 9 with HadoopTaskInput

use of org.apache.ignite.internal.processors.hadoop.HadoopTaskInput in project ignite by apache.

the class HadoopSkipListSelfTest method testMultiThreaded.

/**
 * @throws Exception if failed.
 */
public void testMultiThreaded() throws Exception {
    GridUnsafeMemory mem = new GridUnsafeMemory(0);
    X.println("___ Started");
    Random rnd = new GridRandom();
    for (int i = 0; i < 20; i++) {
        HadoopJobInfo job = new JobInfo();
        final HadoopTaskContext taskCtx = new TaskContext();
        final HadoopMultimap m = new HadoopSkipList(job, mem);
        final ConcurrentMap<Integer, Collection<Integer>> mm = new ConcurrentHashMap<>();
        X.println("___ MT");
        multithreaded(new Callable<Object>() {

            @Override
            public Object call() throws Exception {
                X.println("___ TH in");
                Random rnd = new GridRandom();
                IntWritable key = new IntWritable();
                IntWritable val = new IntWritable();
                HadoopMultimap.Adder a = m.startAdding(taskCtx);
                for (int i = 0; i < 50000; i++) {
                    int k = rnd.nextInt(32000);
                    int v = rnd.nextInt();
                    key.set(k);
                    val.set(v);
                    a.write(key, val);
                    Collection<Integer> list = mm.get(k);
                    if (list == null) {
                        list = new ConcurrentLinkedQueue<>();
                        Collection<Integer> old = mm.putIfAbsent(k, list);
                        if (old != null)
                            list = old;
                    }
                    list.add(v);
                }
                a.close();
                X.println("___ TH out");
                return null;
            }
        }, 3 + rnd.nextInt(27));
        HadoopTaskInput in = m.input(taskCtx);
        int prevKey = Integer.MIN_VALUE;
        while (in.next()) {
            IntWritable key = (IntWritable) in.key();
            assertTrue(key.get() > prevKey);
            prevKey = key.get();
            Iterator<?> valsIter = in.values();
            Collection<Integer> vals = mm.remove(key.get());
            assertNotNull(vals);
            while (valsIter.hasNext()) {
                IntWritable val = (IntWritable) valsIter.next();
                assertTrue(vals.remove(val.get()));
            }
            assertTrue(vals.isEmpty());
        }
        in.close();
        m.close();
        assertEquals(0, mem.allocatedSize());
    }
}

Also used : HadoopJobInfo(org.apache.ignite.internal.processors.hadoop.HadoopJobInfo) HadoopTaskInput(org.apache.ignite.internal.processors.hadoop.HadoopTaskInput) HadoopTaskContext(org.apache.ignite.internal.processors.hadoop.HadoopTaskContext) HadoopMultimap(org.apache.ignite.internal.processors.hadoop.shuffle.collections.HadoopMultimap) IOException(java.io.IOException) GridRandom(org.apache.ignite.internal.util.GridRandom) Random(java.util.Random) GridRandom(org.apache.ignite.internal.util.GridRandom) HadoopJobInfo(org.apache.ignite.internal.processors.hadoop.HadoopJobInfo) HadoopSkipList(org.apache.ignite.internal.processors.hadoop.shuffle.collections.HadoopSkipList) HadoopTaskContext(org.apache.ignite.internal.processors.hadoop.HadoopTaskContext) Collection(java.util.Collection) GridUnsafeMemory(org.apache.ignite.internal.util.offheap.unsafe.GridUnsafeMemory) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) IntWritable(org.apache.hadoop.io.IntWritable)

Aggregations

HadoopTaskInput (org.apache.ignite.internal.processors.hadoop.HadoopTaskInput)9 Collection (java.util.Collection)5 IntWritable (org.apache.hadoop.io.IntWritable)5 IOException (java.io.IOException)4 HadoopMultimap (org.apache.ignite.internal.processors.hadoop.shuffle.collections.HadoopMultimap)3 LinkedList (java.util.LinkedList)2 Random (java.util.Random)2 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)2 ConcurrentLinkedQueue (java.util.concurrent.ConcurrentLinkedQueue)2 Writable (org.apache.hadoop.io.Writable)2 HadoopJobInfo (org.apache.ignite.internal.processors.hadoop.HadoopJobInfo)2 HadoopTaskCancelledException (org.apache.ignite.internal.processors.hadoop.HadoopTaskCancelledException)2 HadoopTaskContext (org.apache.ignite.internal.processors.hadoop.HadoopTaskContext)2 HadoopConcurrentHashMultimap (org.apache.ignite.internal.processors.hadoop.shuffle.collections.HadoopConcurrentHashMultimap)2 GridRandom (org.apache.ignite.internal.util.GridRandom)2 GridDataInput (org.apache.ignite.internal.util.io.GridDataInput)2 GridUnsafeDataInput (org.apache.ignite.internal.util.io.GridUnsafeDataInput)2 GridUnsafeMemory (org.apache.ignite.internal.util.offheap.unsafe.GridUnsafeMemory)2 DataInput (java.io.DataInput)1 ArrayList (java.util.ArrayList)1