Search in sources :

Example 6 with HadoopTaskContext

use of org.apache.ignite.internal.processors.hadoop.HadoopTaskContext in project ignite by apache.

the class HadoopSkipListSelfTest method testMultiThreaded.

/**
 * @throws Exception if failed.
 */
public void testMultiThreaded() throws Exception {
    GridUnsafeMemory mem = new GridUnsafeMemory(0);
    X.println("___ Started");
    Random rnd = new GridRandom();
    for (int i = 0; i < 20; i++) {
        HadoopJobInfo job = new JobInfo();
        final HadoopTaskContext taskCtx = new TaskContext();
        final HadoopMultimap m = new HadoopSkipList(job, mem);
        final ConcurrentMap<Integer, Collection<Integer>> mm = new ConcurrentHashMap<>();
        X.println("___ MT");
        multithreaded(new Callable<Object>() {

            @Override
            public Object call() throws Exception {
                X.println("___ TH in");
                Random rnd = new GridRandom();
                IntWritable key = new IntWritable();
                IntWritable val = new IntWritable();
                HadoopMultimap.Adder a = m.startAdding(taskCtx);
                for (int i = 0; i < 50000; i++) {
                    int k = rnd.nextInt(32000);
                    int v = rnd.nextInt();
                    key.set(k);
                    val.set(v);
                    a.write(key, val);
                    Collection<Integer> list = mm.get(k);
                    if (list == null) {
                        list = new ConcurrentLinkedQueue<>();
                        Collection<Integer> old = mm.putIfAbsent(k, list);
                        if (old != null)
                            list = old;
                    }
                    list.add(v);
                }
                a.close();
                X.println("___ TH out");
                return null;
            }
        }, 3 + rnd.nextInt(27));
        HadoopTaskInput in = m.input(taskCtx);
        int prevKey = Integer.MIN_VALUE;
        while (in.next()) {
            IntWritable key = (IntWritable) in.key();
            assertTrue(key.get() > prevKey);
            prevKey = key.get();
            Iterator<?> valsIter = in.values();
            Collection<Integer> vals = mm.remove(key.get());
            assertNotNull(vals);
            while (valsIter.hasNext()) {
                IntWritable val = (IntWritable) valsIter.next();
                assertTrue(vals.remove(val.get()));
            }
            assertTrue(vals.isEmpty());
        }
        in.close();
        m.close();
        assertEquals(0, mem.allocatedSize());
    }
}
Also used : HadoopJobInfo(org.apache.ignite.internal.processors.hadoop.HadoopJobInfo) HadoopTaskInput(org.apache.ignite.internal.processors.hadoop.HadoopTaskInput) HadoopTaskContext(org.apache.ignite.internal.processors.hadoop.HadoopTaskContext) HadoopMultimap(org.apache.ignite.internal.processors.hadoop.shuffle.collections.HadoopMultimap) IOException(java.io.IOException) GridRandom(org.apache.ignite.internal.util.GridRandom) Random(java.util.Random) GridRandom(org.apache.ignite.internal.util.GridRandom) HadoopJobInfo(org.apache.ignite.internal.processors.hadoop.HadoopJobInfo) HadoopSkipList(org.apache.ignite.internal.processors.hadoop.shuffle.collections.HadoopSkipList) HadoopTaskContext(org.apache.ignite.internal.processors.hadoop.HadoopTaskContext) Collection(java.util.Collection) GridUnsafeMemory(org.apache.ignite.internal.util.offheap.unsafe.GridUnsafeMemory) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) IntWritable(org.apache.hadoop.io.IntWritable)

Example 7 with HadoopTaskContext

use of org.apache.ignite.internal.processors.hadoop.HadoopTaskContext in project ignite by apache.

the class HadoopShuffleJob method onShuffleMessage.

/**
 * @param src Source.
 * @param msg Message.
 * @throws IgniteCheckedException Exception.
 */
public void onShuffleMessage(T src, HadoopShuffleMessage msg) throws IgniteCheckedException {
    assert msg.buffer() != null;
    assert msg.offset() > 0;
    HadoopTaskContext taskCtx = locReducersCtx.get(msg.reducer()).get();
    HadoopPerformanceCounter perfCntr = HadoopPerformanceCounter.getCounter(taskCtx.counters(), null);
    perfCntr.onShuffleMessage(msg.reducer(), U.currentTimeMillis());
    HadoopMultimap map = getOrCreateMap(locMaps, msg.reducer());
    // Add data from message to the map.
    try (HadoopMultimap.Adder adder = map.startAdding(taskCtx)) {
        final GridUnsafeDataInput dataInput = new GridUnsafeDataInput();
        final UnsafeValue val = new UnsafeValue(msg.buffer());
        msg.visit(new HadoopShuffleMessage.Visitor() {

            /**
             */
            private HadoopMultimap.Key key;

            @Override
            public void onKey(byte[] buf, int off, int len) throws IgniteCheckedException {
                dataInput.bytes(buf, off, off + len);
                key = adder.addKey(dataInput, key);
            }

            @Override
            public void onValue(byte[] buf, int off, int len) {
                val.off = off;
                val.size = len;
                key.add(val);
            }
        });
    }
    if (embedded) {
        // No immediate response.
        if (localShuffleState(src).onShuffleMessage())
            sendFinishResponse(src, msg.jobId());
    } else
        // Response for every message.
        io.apply(src, new HadoopShuffleAck(msg.id(), msg.jobId()));
}
Also used : HadoopMultimap(org.apache.ignite.internal.processors.hadoop.shuffle.collections.HadoopMultimap) HadoopPerformanceCounter(org.apache.ignite.internal.processors.hadoop.counter.HadoopPerformanceCounter) GridUnsafeDataInput(org.apache.ignite.internal.util.io.GridUnsafeDataInput) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) HadoopTaskContext(org.apache.ignite.internal.processors.hadoop.HadoopTaskContext)

Example 8 with HadoopTaskContext

use of org.apache.ignite.internal.processors.hadoop.HadoopTaskContext in project ignite by apache.

the class HadoopShuffleJob method onDirectShuffleMessage.

/**
 * Process shuffle message.
 *
 * @param src Source.
 * @param msg Message.
 * @throws IgniteCheckedException Exception.
 */
public void onDirectShuffleMessage(T src, HadoopDirectShuffleMessage msg) throws IgniteCheckedException {
    byte[] buf = extractBuffer(msg);
    assert buf != null;
    int rdc = msg.reducer();
    HadoopTaskContext taskCtx = locReducersCtx.get(rdc).get();
    HadoopPerformanceCounter perfCntr = HadoopPerformanceCounter.getCounter(taskCtx.counters(), null);
    perfCntr.onShuffleMessage(rdc, U.currentTimeMillis());
    HadoopMultimap map = getOrCreateMap(locMaps, rdc);
    HadoopSerialization keySer = taskCtx.keySerialization();
    HadoopSerialization valSer = taskCtx.valueSerialization();
    // Add data from message to the map.
    try (HadoopMultimap.Adder adder = map.startAdding(taskCtx)) {
        HadoopDirectDataInput in = new HadoopDirectDataInput(buf);
        Object key = null;
        Object val = null;
        for (int i = 0; i < msg.count(); i++) {
            key = keySer.read(in, key);
            val = valSer.read(in, val);
            adder.write(key, val);
        }
    }
    if (localShuffleState(src).onShuffleMessage())
        sendFinishResponse(src, msg.jobId());
}
Also used : HadoopDirectDataInput(org.apache.ignite.internal.processors.hadoop.shuffle.direct.HadoopDirectDataInput) HadoopMultimap(org.apache.ignite.internal.processors.hadoop.shuffle.collections.HadoopMultimap) HadoopTaskContext(org.apache.ignite.internal.processors.hadoop.HadoopTaskContext) HadoopPerformanceCounter(org.apache.ignite.internal.processors.hadoop.counter.HadoopPerformanceCounter) HadoopSerialization(org.apache.ignite.internal.processors.hadoop.HadoopSerialization)

Example 9 with HadoopTaskContext

use of org.apache.ignite.internal.processors.hadoop.HadoopTaskContext in project ignite by apache.

the class HadoopEmbeddedTaskExecutor method run.

/**
 * {@inheritDoc}
 */
@Override
public void run(final HadoopJobEx job, Collection<HadoopTaskInfo> tasks) throws IgniteCheckedException {
    if (log.isDebugEnabled())
        log.debug("Submitting tasks for local execution [locNodeId=" + ctx.localNodeId() + ", tasksCnt=" + tasks.size() + ']');
    Collection<HadoopRunnableTask> executedTasks = jobs.get(job.id());
    if (executedTasks == null) {
        executedTasks = new GridConcurrentHashSet<>();
        Collection<HadoopRunnableTask> extractedCol = jobs.put(job.id(), executedTasks);
        assert extractedCol == null;
    }
    final Collection<HadoopRunnableTask> finalExecutedTasks = executedTasks;
    for (final HadoopTaskInfo info : tasks) {
        assert info != null;
        HadoopRunnableTask task = new HadoopRunnableTask(log, job, ctx.shuffle().memory(), info, ctx.localNodeId()) {

            @Override
            protected void onTaskFinished(HadoopTaskStatus status) {
                if (log.isDebugEnabled())
                    log.debug("Finished task execution [jobId=" + job.id() + ", taskInfo=" + info + ", " + "waitTime=" + waitTime() + ", execTime=" + executionTime() + ']');
                finalExecutedTasks.remove(this);
                jobTracker.onTaskFinished(info, status);
            }

            @Override
            protected HadoopTaskInput createInput(HadoopTaskContext taskCtx) throws IgniteCheckedException {
                return ctx.shuffle().input(taskCtx);
            }

            @Override
            protected HadoopTaskOutput createOutput(HadoopTaskContext taskCtx) throws IgniteCheckedException {
                return ctx.shuffle().output(taskCtx);
            }
        };
        executedTasks.add(task);
        exec.submit(task);
    }
}
Also used : HadoopTaskInfo(org.apache.ignite.internal.processors.hadoop.HadoopTaskInfo) HadoopTaskContext(org.apache.ignite.internal.processors.hadoop.HadoopTaskContext)

Example 10 with HadoopTaskContext

use of org.apache.ignite.internal.processors.hadoop.HadoopTaskContext in project ignite by apache.

the class HadoopV2Job method getTaskContext.

/**
 * {@inheritDoc}
 */
@SuppressWarnings({ "unchecked", "MismatchedQueryAndUpdateOfCollection" })
@Override
public HadoopTaskContext getTaskContext(HadoopTaskInfo info) throws IgniteCheckedException {
    T2<HadoopTaskType, Integer> locTaskId = new T2<>(info.type(), info.taskNumber());
    GridFutureAdapter<HadoopTaskContext> fut = ctxs.get(locTaskId);
    if (fut != null)
        return fut.get();
    GridFutureAdapter<HadoopTaskContext> old = ctxs.putIfAbsent(locTaskId, fut = new GridFutureAdapter<>());
    if (old != null)
        return old.get();
    Class<? extends HadoopTaskContext> cls = taskCtxClsPool.poll();
    try {
        if (cls == null) {
            // If there is no pooled class, then load new one.
            // Note that the classloader identified by the task it was initially created for,
            // but later it may be reused for other tasks.
            HadoopClassLoader ldr = sharedClsLdr != null ? sharedClsLdr : createClassLoader(HadoopClassLoader.nameForTask(info, false));
            cls = (Class<? extends HadoopTaskContext>) ldr.loadClass(HadoopV2TaskContext.class.getName());
            fullCtxClsQueue.add(cls);
        }
        Constructor<?> ctr = cls.getConstructor(HadoopTaskInfo.class, HadoopJobEx.class, HadoopJobId.class, UUID.class, DataInput.class);
        if (jobConfData == null)
            synchronized (jobConf) {
                if (jobConfData == null) {
                    ByteArrayOutputStream buf = new ByteArrayOutputStream();
                    jobConf.write(new DataOutputStream(buf));
                    jobConfData = buf.toByteArray();
                }
            }
        HadoopTaskContext res = (HadoopTaskContext) ctr.newInstance(info, this, jobId, locNodeId, new DataInputStream(new ByteArrayInputStream(jobConfData)));
        fut.onDone(res);
        return res;
    } catch (Throwable e) {
        IgniteCheckedException te = transformException(e);
        fut.onDone(te);
        if (e instanceof Error)
            throw (Error) e;
        throw te;
    }
}
Also used : HadoopClassLoader(org.apache.ignite.internal.processors.hadoop.HadoopClassLoader) HadoopTaskType(org.apache.ignite.internal.processors.hadoop.HadoopTaskType) DataOutputStream(java.io.DataOutputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) DataInputStream(java.io.DataInputStream) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) ByteArrayInputStream(java.io.ByteArrayInputStream) HadoopTaskContext(org.apache.ignite.internal.processors.hadoop.HadoopTaskContext) GridFutureAdapter(org.apache.ignite.internal.util.future.GridFutureAdapter) T2(org.apache.ignite.internal.util.typedef.T2)

Aggregations

HadoopTaskContext (org.apache.ignite.internal.processors.hadoop.HadoopTaskContext)11 Random (java.util.Random)5 IntWritable (org.apache.hadoop.io.IntWritable)5 HadoopMultimap (org.apache.ignite.internal.processors.hadoop.shuffle.collections.HadoopMultimap)5 GridUnsafeMemory (org.apache.ignite.internal.util.offheap.unsafe.GridUnsafeMemory)5 HadoopJobInfo (org.apache.ignite.internal.processors.hadoop.HadoopJobInfo)4 GridRandom (org.apache.ignite.internal.util.GridRandom)4 ByteArrayInputStream (java.io.ByteArrayInputStream)2 DataInputStream (java.io.DataInputStream)2 IOException (java.io.IOException)2 Collection (java.util.Collection)2 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)2 ConcurrentLinkedQueue (java.util.concurrent.ConcurrentLinkedQueue)2 IgniteCheckedException (org.apache.ignite.IgniteCheckedException)2 HadoopSerialization (org.apache.ignite.internal.processors.hadoop.HadoopSerialization)2 HadoopTaskInfo (org.apache.ignite.internal.processors.hadoop.HadoopTaskInfo)2 HadoopTaskInput (org.apache.ignite.internal.processors.hadoop.HadoopTaskInput)2 HadoopPerformanceCounter (org.apache.ignite.internal.processors.hadoop.counter.HadoopPerformanceCounter)2 HadoopConcurrentHashMultimap (org.apache.ignite.internal.processors.hadoop.shuffle.collections.HadoopConcurrentHashMultimap)2 HadoopSkipList (org.apache.ignite.internal.processors.hadoop.shuffle.collections.HadoopSkipList)2