Search in sources :

Example 1 with TField

use of edu.iu.dsc.tws.common.table.TField in project twister2 by DSC-SPIDAL.

the class PartitionExample method execute.

@Override
public void execute(WorkerEnvironment workerEnvironment) {
    BatchEnvironment env = TSetEnvironment.initBatch(workerEnvironment);
    List<TField> fieldList = new ArrayList<>();
    fieldList.add(new TField("first", MessageTypes.INTEGER));
    fieldList.add(new TField("second", MessageTypes.DOUBLE));
    RowSourceTSet src = env.createRowSource("row", new SourceFunc<Row>() {

        private int count = 0;

        @Override
        public boolean hasNext() {
            return count++ < 1000;
        }

        @Override
        public Row next() {
            return new TwoRow(1, 4.1);
        }
    }, 4).withSchema(new RowSchema(fieldList));
    BatchRowTLink partition = src.partition(new PartitionFunc<Row>() {

        private List<Integer> targets;

        private Random random;

        private int c = 0;

        private Map<Integer, Integer> counts = new HashMap<>();

        @Override
        public void prepare(Set<Integer> sources, Set<Integer> destinations) {
            targets = new ArrayList<>(destinations);
            random = new Random();
            for (int t : targets) {
                counts.put(t, 0);
            }
        }

        @Override
        public int partition(int sourceIndex, Row val) {
            int index = random.nextInt(targets.size());
            int count = counts.get(index);
            counts.put(index, count + 1);
            c++;
            if (c == 1000) {
                LOG.info("COUNTS " + counts);
            }
            return targets.get(index);
        }
    }, 4, 0);
    partition.forEach(new ApplyFunc<Row>() {

        private TSetContext ctx;

        private int count;

        @Override
        public void prepare(TSetContext context) {
            ctx = context;
        }

        @Override
        public void apply(Row data) {
            LOG.info(ctx.getIndex() + " Data " + data.get(0) + ", " + data.get(1) + ", count " + count++);
        }
    });
}
Also used : RowSchema(edu.iu.dsc.tws.api.tset.schema.RowSchema) RowSourceTSet(edu.iu.dsc.tws.tset.sets.batch.row.RowSourceTSet) TField(edu.iu.dsc.tws.common.table.TField) HashMap(java.util.HashMap) BatchEnvironment(edu.iu.dsc.tws.tset.env.BatchEnvironment) ArrayList(java.util.ArrayList) SourceFunc(edu.iu.dsc.tws.api.tset.fn.SourceFunc) TSetContext(edu.iu.dsc.tws.api.tset.TSetContext) Random(java.util.Random) TwoRow(edu.iu.dsc.tws.common.table.TwoRow) BatchRowTLink(edu.iu.dsc.tws.api.tset.link.batch.BatchRowTLink) Row(edu.iu.dsc.tws.common.table.Row) TwoRow(edu.iu.dsc.tws.common.table.TwoRow)

Example 2 with TField

use of edu.iu.dsc.tws.common.table.TField in project twister2 by DSC-SPIDAL.

the class RowSchema method fromArrow.

public static RowSchema fromArrow(org.apache.arrow.vector.types.pojo.Schema schema) {
    List<Field> fields = schema.getFields();
    List<TField> tFields = new ArrayList<>();
    for (Field f : fields) {
        TField tField;
        if (f.getFieldType().equals(ArrowTypes.INT_FIELD_TYPE)) {
            tField = new TField(f.getName(), MessageTypes.INTEGER);
        } else if (f.getFieldType().equals(ArrowTypes.LONG_FIELD_TYPE)) {
            tField = new TField(f.getName(), MessageTypes.LONG);
        } else if (f.getFieldType().equals(ArrowTypes.SHORT_FIELD_TYPE)) {
            tField = new TField(f.getName(), MessageTypes.SHORT);
        } else if (f.getFieldType().equals(ArrowTypes.FLOAT_FIELD_TYPE)) {
            tField = new TField(f.getName(), MessageTypes.FLOAT);
        } else if (f.getFieldType().equals(ArrowTypes.DOUBLE_FIELD_TYPE)) {
            tField = new TField(f.getName(), MessageTypes.DOUBLE);
        } else if (f.getFieldType().equals(ArrowTypes.STRING_FIELD_TYPE)) {
            tField = new TField(f.getName(), MessageTypes.STRING);
        } else if (f.getFieldType().equals(ArrowTypes.BINARY_FILED_TYPE)) {
            tField = new TField(f.getName(), MessageTypes.BYTE);
        } else {
            throw new Twister2RuntimeException("Unknown type");
        }
        tFields.add(tField);
    }
    return new RowSchema(tFields);
}
Also used : Field(org.apache.arrow.vector.types.pojo.Field) TField(edu.iu.dsc.tws.common.table.TField) Twister2RuntimeException(edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException) TField(edu.iu.dsc.tws.common.table.TField) ArrayList(java.util.ArrayList)

Example 3 with TField

use of edu.iu.dsc.tws.common.table.TField in project twister2 by DSC-SPIDAL.

the class RowSchema method toArrowSchema.

public org.apache.arrow.vector.types.pojo.Schema toArrowSchema() {
    List<Field> fields = new ArrayList<>();
    for (TField f : types) {
        Field field;
        if (f.getType().equals(MessageTypes.INTEGER)) {
            field = new Field(f.getName(), new FieldType(false, new ArrowType.Int(32, true), null), null);
        } else if (f.getType().equals(MessageTypes.LONG)) {
            field = new Field(f.getName(), new FieldType(false, new ArrowType.Int(64, true), null), null);
        } else if (f.getType().equals(MessageTypes.SHORT)) {
            field = new Field(f.getName(), new FieldType(false, new ArrowType.Int(16, true), null), null);
        } else if (f.getType().equals(MessageTypes.FLOAT)) {
            field = new Field(f.getName(), new FieldType(false, new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE), null), null);
        } else if (f.getType().equals(MessageTypes.DOUBLE)) {
            field = new Field(f.getName(), new FieldType(false, new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE), null), null);
        } else if (f.getType().equals(MessageTypes.STRING)) {
            field = new Field(f.getName(), new FieldType(false, new ArrowType.Binary(), null), null);
        } else if (f.getType().equals(MessageTypes.BYTE)) {
            field = new Field(f.getName(), new FieldType(false, new ArrowType.Binary(), null), null);
        } else {
            throw new Twister2RuntimeException("Un-known type");
        }
        fields.add(field);
    }
    return new org.apache.arrow.vector.types.pojo.Schema(fields);
}
Also used : Twister2RuntimeException(edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException) TField(edu.iu.dsc.tws.common.table.TField) ArrayList(java.util.ArrayList) ArrowType(org.apache.arrow.vector.types.pojo.ArrowType) FieldType(org.apache.arrow.vector.types.pojo.FieldType) Field(org.apache.arrow.vector.types.pojo.Field) TField(edu.iu.dsc.tws.common.table.TField)

Aggregations

TField (edu.iu.dsc.tws.common.table.TField)3 ArrayList (java.util.ArrayList)3 Twister2RuntimeException (edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException)2 Field (org.apache.arrow.vector.types.pojo.Field)2 TSetContext (edu.iu.dsc.tws.api.tset.TSetContext)1 SourceFunc (edu.iu.dsc.tws.api.tset.fn.SourceFunc)1 BatchRowTLink (edu.iu.dsc.tws.api.tset.link.batch.BatchRowTLink)1 RowSchema (edu.iu.dsc.tws.api.tset.schema.RowSchema)1 Row (edu.iu.dsc.tws.common.table.Row)1 TwoRow (edu.iu.dsc.tws.common.table.TwoRow)1 BatchEnvironment (edu.iu.dsc.tws.tset.env.BatchEnvironment)1 RowSourceTSet (edu.iu.dsc.tws.tset.sets.batch.row.RowSourceTSet)1 HashMap (java.util.HashMap)1 Random (java.util.Random)1 ArrowType (org.apache.arrow.vector.types.pojo.ArrowType)1 FieldType (org.apache.arrow.vector.types.pojo.FieldType)1