Search in sources :

Example 46 with Tuple

use of edu.iu.dsc.tws.api.comms.structs.Tuple in project twister2 by DSC-SPIDAL.

the class SortJoinUtils method innerJoin.

/**
 * This util can be used to perform disk based inner join operations.
 */
public static Iterator<JoinedTuple> innerJoin(RestorableIterator<Tuple<?, ?>> leftIt, RestorableIterator<Tuple<?, ?>> rightIt, KeyComparatorWrapper comparator) {
    return new Iterator<JoinedTuple>() {

        private JoinedTuple nextJoinTuple;

        private Tuple currentLeft;

        private Tuple currentRight;

        // backup variables will hold a Tuple temporary if had to call .next()
        // once during the join operation before creating a iterator restore point.
        private Tuple backedUpLeft;

        private Tuple backedUpRight;

        // flags to mark the required side of iteration
        private boolean shouldDoLeftIterations = false;

        private boolean shouldDoRightIterations = false;

        // keeps the no of iterations done on each side of the relationship while keeping the
        // other side constant
        private int leftIterations = 0;

        private int rightIterations = 0;

        private JoinedTuple doLeftIteration() {
            if (!shouldDoLeftIterations) {
                return null;
            }
            JoinedTuple jtFromLeftIt = null;
            if (leftIt.hasNext()) {
                Tuple l = leftIt.next();
                if (this.leftIterations == 0) {
                    this.backedUpLeft = l;
                }
                if (comparator.compare(l, this.currentRight) == 0) {
                    if (this.leftIterations == 0) {
                        leftIt.createRestorePoint();
                    }
                    this.leftIterations++;
                    jtFromLeftIt = new JoinedTuple<>(l.getKey(), l.getValue(), this.currentRight.getValue());
                }
            }
            /*
         if this is the end of left iteration(jtFromLeftIt == null), configure the right iterations
         to run next and restore left iterator
        */
            if (jtFromLeftIt == null) {
                this.leftIterations = 0;
                this.shouldDoLeftIterations = false;
                this.shouldDoRightIterations = true;
                if (leftIt.hasRestorePoint()) {
                    leftIt.restore();
                    leftIt.clearRestorePoint();
                }
            }
            return jtFromLeftIt;
        }

        private JoinedTuple doRightIteration() {
            if (!shouldDoRightIterations) {
                return null;
            }
            JoinedTuple jtFromRightIt = null;
            if (rightIt.hasNext()) {
                Tuple l = rightIt.next();
                if (this.rightIterations == 0) {
                    this.backedUpRight = l;
                }
                if (comparator.compare(this.currentLeft, l) == 0) {
                    if (this.rightIterations == 0) {
                        rightIt.createRestorePoint();
                    }
                    this.rightIterations++;
                    jtFromRightIt = new JoinedTuple<>(l.getKey(), this.currentLeft.getValue(), l.getValue());
                }
            }
            /*
         if this is the end of left iteration(jtFromRightIt == null), configure the right iterations
         to run next and restore left iterator
        */
            if (jtFromRightIt == null) {
                this.rightIterations = 0;
                this.shouldDoRightIterations = false;
                if (rightIt.hasRestorePoint()) {
                    rightIt.restore();
                    rightIt.clearRestorePoint();
                }
            }
            return jtFromRightIt;
        }

        private void makeNextJoinTuple() {
            nextJoinTuple = this.doLeftIteration();
            if (nextJoinTuple == null) {
                nextJoinTuple = this.doRightIteration();
            }
            while (nextJoinTuple == null && (this.backedUpLeft != null || leftIt.hasNext()) && (this.backedUpRight != null || rightIt.hasNext())) {
                this.currentLeft = this.backedUpLeft != null ? this.backedUpLeft : leftIt.next();
                // we used the backup, so setting to null
                this.backedUpLeft = null;
                this.currentRight = this.backedUpRight != null ? this.backedUpRight : rightIt.next();
                this.backedUpRight = null;
                // still we don't need left or right iterations at this point
                this.shouldDoLeftIterations = false;
                this.shouldDoRightIterations = false;
                if (comparator.compare(this.currentLeft, this.currentRight) == 0) {
                    this.nextJoinTuple = new JoinedTuple<>(this.currentLeft.getKey(), this.currentLeft.getValue(), this.currentRight.getValue());
                    // schedule to run the left iteration next.
                    // Left iteration at the end will schedule right iteration
                    this.shouldDoLeftIterations = true;
                    break;
                } else if (comparator.compare(this.currentLeft, this.currentRight) < 0) {
                    if (leftIt.hasNext()) {
                        this.backedUpLeft = leftIt.next();
                    }
                    this.backedUpRight = this.currentRight;
                } else {
                    if (rightIt.hasNext()) {
                        this.backedUpRight = rightIt.next();
                    }
                    this.backedUpLeft = this.currentLeft;
                }
            }
        }

        {
            // start by creating the first join tuple
            this.makeNextJoinTuple();
        }

        @Override
        public boolean hasNext() {
            return nextJoinTuple != null;
        }

        @Override
        public JoinedTuple next() {
            JoinedTuple current = nextJoinTuple;
            this.makeNextJoinTuple();
            return current;
        }
    };
}
Also used : RestorableIterator(edu.iu.dsc.tws.comms.shuffle.RestorableIterator) Iterator(java.util.Iterator) JoinedTuple(edu.iu.dsc.tws.api.comms.structs.JoinedTuple) Tuple(edu.iu.dsc.tws.api.comms.structs.Tuple) JoinedTuple(edu.iu.dsc.tws.api.comms.structs.JoinedTuple)

Example 47 with Tuple

use of edu.iu.dsc.tws.api.comms.structs.Tuple in project twister2 by DSC-SPIDAL.

the class SortJoinUtilsTest method innerJoinWithDiskBasedListComparision.

/**
 * This test compares the results of in memory and disk based inner joins.
 * Purpose is to verify the accuracy of disk based inner join
 */
@Test
public void innerJoinWithDiskBasedListComparision() {
    List<Tuple> left = new ArrayList<>();
    List<Tuple> right = new ArrayList<>();
    Random random = new Random();
    for (int i = 0; i < 100; i++) {
        left.add(Tuple.of(random.nextInt(10), random.nextInt()));
        right.add(Tuple.of(random.nextInt(10), random.nextInt()));
    }
    FSKeyedSortedMerger2 fsk1 = new FSKeyedSortedMerger2(10, 100, "/tmp", "op-1-" + UUID.randomUUID().toString(), MessageTypes.INTEGER, MessageTypes.INTEGER, (Comparator<Integer>) Integer::compare, 0, false, 1);
    for (Tuple tuple : left) {
        byte[] data = MessageTypes.INTEGER.getDataPacker().packToByteArray((Integer) tuple.getValue());
        fsk1.add(tuple.getKey(), data, data.length);
        fsk1.run();
    }
    FSKeyedSortedMerger2 fsk2 = new FSKeyedSortedMerger2(10, 100, "/tmp", "op-2-" + UUID.randomUUID().toString(), MessageTypes.INTEGER, MessageTypes.INTEGER, (Comparator<Integer>) Integer::compare, 0, false, 1);
    for (Tuple tuple : right) {
        byte[] data = MessageTypes.INTEGER.getDataPacker().packToByteArray((Integer) tuple.getValue());
        fsk2.add(tuple.getKey(), data, data.length);
        fsk2.run();
    }
    CommonThreadPool.init(Config.newBuilder().build());
    fsk1.switchToReading();
    fsk2.switchToReading();
    Iterator iterator = SortJoinUtils.joinWithCache((RestorableIterator) fsk1.readIterator(), (RestorableIterator) fsk2.readIterator(), new KeyComparatorWrapper((Comparator<Integer>) Integer::compare), CommunicationContext.JoinType.INNER, Config.newBuilder().build());
    List<Object> objects = SortJoinUtils.innerJoin(left, right, new KeyComparatorWrapper(Comparator.naturalOrder()));
    objects.sort(Comparator.comparingInt(o -> (Integer) ((JoinedTuple) o).getKey()));
    int i = 0;
    while (iterator.hasNext()) {
        JoinedTuple nextFromIt = (JoinedTuple) iterator.next();
        JoinedTuple nextFromList = (JoinedTuple) objects.get(i++);
        Assert.assertEquals(nextFromIt.getKey(), nextFromList.getKey());
    }
    Assert.assertEquals(i, objects.size());
}
Also used : CommunicationContext(edu.iu.dsc.tws.api.comms.CommunicationContext) Tuple(edu.iu.dsc.tws.api.comms.structs.Tuple) FSKeyedSortedMerger2(edu.iu.dsc.tws.comms.shuffle.FSKeyedSortedMerger2) RestorableIterator(edu.iu.dsc.tws.comms.shuffle.RestorableIterator) Iterator(java.util.Iterator) CommonThreadPool(edu.iu.dsc.tws.api.util.CommonThreadPool) Random(java.util.Random) Test(org.junit.Test) JoinedTuple(edu.iu.dsc.tws.api.comms.structs.JoinedTuple) Config(edu.iu.dsc.tws.api.config.Config) UUID(java.util.UUID) MessageTypes(edu.iu.dsc.tws.api.comms.messaging.types.MessageTypes) Logger(java.util.logging.Logger) ArrayList(java.util.ArrayList) List(java.util.List) Comparator(java.util.Comparator) Assert(org.junit.Assert) FSKeyedSortedMerger2(edu.iu.dsc.tws.comms.shuffle.FSKeyedSortedMerger2) ArrayList(java.util.ArrayList) JoinedTuple(edu.iu.dsc.tws.api.comms.structs.JoinedTuple) Comparator(java.util.Comparator) Random(java.util.Random) RestorableIterator(edu.iu.dsc.tws.comms.shuffle.RestorableIterator) Iterator(java.util.Iterator) Tuple(edu.iu.dsc.tws.api.comms.structs.Tuple) JoinedTuple(edu.iu.dsc.tws.api.comms.structs.JoinedTuple) Test(org.junit.Test)

Example 48 with Tuple

use of edu.iu.dsc.tws.api.comms.structs.Tuple in project twister2 by DSC-SPIDAL.

the class SortJoinUtilsTest method leftOuterJoinComparision.

/**
 * This test compares the results of in memory and disk based left outer joins.
 * Purpose is to verify the accuracy of disk based left outer join
 */
@Test
public void leftOuterJoinComparision() {
    List<Tuple> left = new ArrayList<>();
    List<Tuple> right = new ArrayList<>();
    Random random = new Random();
    for (int i = 0; i < 100; i++) {
        left.add(Tuple.of(random.nextInt(10), random.nextInt()));
        right.add(Tuple.of(random.nextInt(10), random.nextInt()));
    }
    FSKeyedSortedMerger2 fsk1 = new FSKeyedSortedMerger2(10, 100, "/tmp", "op-1-" + UUID.randomUUID().toString(), MessageTypes.INTEGER, MessageTypes.INTEGER, (Comparator<Integer>) Integer::compare, 0, false, 1);
    for (Tuple tuple : left) {
        byte[] data = MessageTypes.INTEGER.getDataPacker().packToByteArray((Integer) tuple.getValue());
        fsk1.add(tuple.getKey(), data, data.length);
        fsk1.run();
    }
    FSKeyedSortedMerger2 fsk2 = new FSKeyedSortedMerger2(10, 100, "/tmp", "op-2-" + UUID.randomUUID().toString(), MessageTypes.INTEGER, MessageTypes.INTEGER, (Comparator<Integer>) Integer::compare, 0, false, 1);
    for (Tuple tuple : right) {
        byte[] data = MessageTypes.INTEGER.getDataPacker().packToByteArray((Integer) tuple.getValue());
        fsk2.add(tuple.getKey(), data, data.length);
        fsk2.run();
    }
    CommonThreadPool.init(Config.newBuilder().build());
    fsk1.switchToReading();
    fsk2.switchToReading();
    Iterator iterator = SortJoinUtils.leftOuterJoin((RestorableIterator) fsk1.readIterator(), (RestorableIterator) fsk2.readIterator(), new KeyComparatorWrapper((Comparator<Integer>) Integer::compare));
    List<Object> objects = SortJoinUtils.leftOuterJoin(left, right, new KeyComparatorWrapper(Comparator.naturalOrder()));
    objects.sort(Comparator.comparingInt(o -> (Integer) ((JoinedTuple) o).getKey()));
    int i = 0;
    while (iterator.hasNext()) {
        JoinedTuple nextFromIt = (JoinedTuple) iterator.next();
        JoinedTuple nextFromList = (JoinedTuple) objects.get(i++);
        Assert.assertEquals(nextFromIt.getKey(), nextFromList.getKey());
    }
    Assert.assertEquals(i, objects.size());
}
Also used : CommunicationContext(edu.iu.dsc.tws.api.comms.CommunicationContext) Tuple(edu.iu.dsc.tws.api.comms.structs.Tuple) FSKeyedSortedMerger2(edu.iu.dsc.tws.comms.shuffle.FSKeyedSortedMerger2) RestorableIterator(edu.iu.dsc.tws.comms.shuffle.RestorableIterator) Iterator(java.util.Iterator) CommonThreadPool(edu.iu.dsc.tws.api.util.CommonThreadPool) Random(java.util.Random) Test(org.junit.Test) JoinedTuple(edu.iu.dsc.tws.api.comms.structs.JoinedTuple) Config(edu.iu.dsc.tws.api.config.Config) UUID(java.util.UUID) MessageTypes(edu.iu.dsc.tws.api.comms.messaging.types.MessageTypes) Logger(java.util.logging.Logger) ArrayList(java.util.ArrayList) List(java.util.List) Comparator(java.util.Comparator) Assert(org.junit.Assert) FSKeyedSortedMerger2(edu.iu.dsc.tws.comms.shuffle.FSKeyedSortedMerger2) ArrayList(java.util.ArrayList) JoinedTuple(edu.iu.dsc.tws.api.comms.structs.JoinedTuple) Comparator(java.util.Comparator) Random(java.util.Random) RestorableIterator(edu.iu.dsc.tws.comms.shuffle.RestorableIterator) Iterator(java.util.Iterator) Tuple(edu.iu.dsc.tws.api.comms.structs.Tuple) JoinedTuple(edu.iu.dsc.tws.api.comms.structs.JoinedTuple) Test(org.junit.Test)

Example 49 with Tuple

use of edu.iu.dsc.tws.api.comms.structs.Tuple in project twister2 by DSC-SPIDAL.

the class HadoopTSet method execute.

@Override
public void execute(Config config, JobAPI.Job job, IWorkerController workerController, IPersistentVolume persistentVolume, IVolatileVolume volatileVolume) {
    int workerId = workerController.getWorkerInfo().getWorkerID();
    WorkerEnvironment workerEnv = WorkerEnvironment.init(config, job, workerController, persistentVolume, volatileVolume);
    BatchEnvironment tSetEnv = TSetEnvironment.initBatch(workerEnv);
    Configuration configuration = new Configuration();
    configuration.addResource(new Path(HdfsDataContext.getHdfsConfigDirectory(config)));
    configuration.set(TextInputFormat.INPUT_DIR, "/input4");
    SourceTSet<String> source = tSetEnv.createHadoopSource(configuration, TextInputFormat.class, 4, new MapFunc<Tuple<LongWritable, Text>, String>() {

        @Override
        public String map(Tuple<LongWritable, Text> input) {
            return input.getKey().toString() + " : " + input.getValue().toString();
        }
    });
    SinkTSet<Iterator<String>> sink = source.direct().sink((SinkFunc<Iterator<String>>) value -> {
        while (value.hasNext()) {
            String next = value.next();
            LOG.info("Received value: " + next);
        }
        return true;
    });
    tSetEnv.run(sink);
}
Also used : Path(org.apache.hadoop.fs.Path) Twister2Job(edu.iu.dsc.tws.api.Twister2Job) HdfsDataContext(edu.iu.dsc.tws.data.utils.HdfsDataContext) ResourceAllocator(edu.iu.dsc.tws.rsched.core.ResourceAllocator) BatchEnvironment(edu.iu.dsc.tws.tset.env.BatchEnvironment) Text(org.apache.hadoop.io.Text) IPersistentVolume(edu.iu.dsc.tws.api.resource.IPersistentVolume) HashMap(java.util.HashMap) Config(edu.iu.dsc.tws.api.config.Config) MapFunc(edu.iu.dsc.tws.api.tset.fn.MapFunc) LongWritable(org.apache.hadoop.io.LongWritable) JobConfig(edu.iu.dsc.tws.api.JobConfig) TextInputFormat(org.apache.hadoop.mapreduce.lib.input.TextInputFormat) Configuration(org.apache.hadoop.conf.Configuration) Path(org.apache.hadoop.fs.Path) Tuple(edu.iu.dsc.tws.api.comms.structs.Tuple) Iterator(java.util.Iterator) IVolatileVolume(edu.iu.dsc.tws.api.resource.IVolatileVolume) SourceTSet(edu.iu.dsc.tws.tset.sets.batch.SourceTSet) SinkTSet(edu.iu.dsc.tws.tset.sets.batch.SinkTSet) JobAPI(edu.iu.dsc.tws.proto.system.job.JobAPI) Logger(java.util.logging.Logger) SinkFunc(edu.iu.dsc.tws.api.tset.fn.SinkFunc) Serializable(java.io.Serializable) Twister2Submitter(edu.iu.dsc.tws.rsched.job.Twister2Submitter) IWorker(edu.iu.dsc.tws.api.resource.IWorker) WorkerEnvironment(edu.iu.dsc.tws.api.resource.WorkerEnvironment) IWorkerController(edu.iu.dsc.tws.api.resource.IWorkerController) TSetEnvironment(edu.iu.dsc.tws.tset.env.TSetEnvironment) Configuration(org.apache.hadoop.conf.Configuration) BatchEnvironment(edu.iu.dsc.tws.tset.env.BatchEnvironment) Text(org.apache.hadoop.io.Text) WorkerEnvironment(edu.iu.dsc.tws.api.resource.WorkerEnvironment) Iterator(java.util.Iterator) LongWritable(org.apache.hadoop.io.LongWritable) Tuple(edu.iu.dsc.tws.api.comms.structs.Tuple)

Example 50 with Tuple

use of edu.iu.dsc.tws.api.comms.structs.Tuple in project twister2 by DSC-SPIDAL.

the class BranchingExample method execute.

@Override
public void execute(WorkerEnvironment workerEnv) {
    BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
    int para = 2;
    SourceTSet<Integer> src = dummySource(env, COUNT, para).setName("src0");
    KeyedTSet<Integer, Integer> left = src.mapToTuple(i -> new Tuple<>(i % 2, i)).setName("left");
    KeyedTSet<Integer, Integer> right = src.mapToTuple(i -> new Tuple<>(i % 2, i + 1)).setName("right");
    JoinTLink<Integer, Integer, Integer> join = left.join(right, CommunicationContext.JoinType.INNER, Integer::compareTo).setName("join");
    ComputeTSet<String> map = join.map(t -> "(" + t.getKey() + " " + t.getLeftValue() + " " + t.getRightValue() + ")").setName("map***");
    ComputeTSet<String> map1 = map.direct().map(s -> "###" + s).setName("map@@");
    ComputeTSet<String> union = map.union(map1).setName("union");
    union.direct().forEach(s -> LOG.info(s));
}
Also used : CommunicationContext(edu.iu.dsc.tws.api.comms.CommunicationContext) Tuple(edu.iu.dsc.tws.api.comms.structs.Tuple) ComputeTSet(edu.iu.dsc.tws.tset.sets.batch.ComputeTSet) SourceTSet(edu.iu.dsc.tws.tset.sets.batch.SourceTSet) ResourceAllocator(edu.iu.dsc.tws.rsched.core.ResourceAllocator) BatchEnvironment(edu.iu.dsc.tws.tset.env.BatchEnvironment) HashMap(java.util.HashMap) Config(edu.iu.dsc.tws.api.config.Config) Logger(java.util.logging.Logger) KeyedTSet(edu.iu.dsc.tws.tset.sets.batch.KeyedTSet) JobConfig(edu.iu.dsc.tws.api.JobConfig) WorkerEnvironment(edu.iu.dsc.tws.api.resource.WorkerEnvironment) TSetEnvironment(edu.iu.dsc.tws.tset.env.TSetEnvironment) JoinTLink(edu.iu.dsc.tws.tset.links.batch.JoinTLink) BatchEnvironment(edu.iu.dsc.tws.tset.env.BatchEnvironment) Tuple(edu.iu.dsc.tws.api.comms.structs.Tuple)

Aggregations

Tuple (edu.iu.dsc.tws.api.comms.structs.Tuple)98 Iterator (java.util.Iterator)38 List (java.util.List)35 Logger (java.util.logging.Logger)34 ArrayList (java.util.ArrayList)29 Config (edu.iu.dsc.tws.api.config.Config)27 WorkerEnvironment (edu.iu.dsc.tws.api.resource.WorkerEnvironment)24 Test (org.junit.Test)24 BatchEnvironment (edu.iu.dsc.tws.tset.env.BatchEnvironment)18 InMessage (edu.iu.dsc.tws.comms.dfw.InMessage)17 HashMap (java.util.HashMap)16 TSetEnvironment (edu.iu.dsc.tws.tset.env.TSetEnvironment)15 JobConfig (edu.iu.dsc.tws.api.JobConfig)14 MessageTypes (edu.iu.dsc.tws.api.comms.messaging.types.MessageTypes)14 JoinedTuple (edu.iu.dsc.tws.api.comms.structs.JoinedTuple)14 ResourceAllocator (edu.iu.dsc.tws.rsched.core.ResourceAllocator)14 SourceTSet (edu.iu.dsc.tws.tset.sets.batch.SourceTSet)13 CommunicationContext (edu.iu.dsc.tws.api.comms.CommunicationContext)11 MessageType (edu.iu.dsc.tws.api.comms.messaging.types.MessageType)11 Comparator (java.util.Comparator)11