Search in sources :

Example 41 with TupleTypeInfo

use of org.apache.flink.api.java.typeutils.TupleTypeInfo in project flink by apache.

the class Graph method mapVertices.

/**
 * Apply a function to the attribute of each vertex in the graph.
 *
 * @param mapper the map function to apply.
 * @return a new graph
 */
@SuppressWarnings({ "unchecked", "rawtypes" })
public <NV> Graph<K, NV, EV> mapVertices(final MapFunction<Vertex<K, VV>, NV> mapper) {
    TypeInformation<K> keyType = ((TupleTypeInfo<?>) vertices.getType()).getTypeAt(0);
    TypeInformation<NV> valueType;
    if (mapper instanceof ResultTypeQueryable) {
        valueType = ((ResultTypeQueryable) mapper).getProducedType();
    } else {
        valueType = TypeExtractor.createTypeInfo(MapFunction.class, mapper.getClass(), 1, vertices.getType(), null);
    }
    TypeInformation<Vertex<K, NV>> returnType = (TypeInformation<Vertex<K, NV>>) new TupleTypeInfo(Vertex.class, keyType, valueType);
    return mapVertices(mapper, returnType);
}
Also used : ResultTypeQueryable(org.apache.flink.api.java.typeutils.ResultTypeQueryable) MapFunction(org.apache.flink.api.common.functions.MapFunction) FlatMapFunction(org.apache.flink.api.common.functions.FlatMapFunction) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation)

Example 42 with TupleTypeInfo

use of org.apache.flink.api.java.typeutils.TupleTypeInfo in project flink by apache.

the class Graph method fromDataSet.

/**
 * Creates a graph from a DataSet of edges. Vertices are created automatically and their values
 * are set by applying the provided map function to the vertex IDs.
 *
 * @param edges a DataSet of edges.
 * @param vertexValueInitializer the mapper function that initializes the vertex values. It
 *     allows to apply a map transformation on the vertex ID to produce an initial vertex value.
 * @param context the flink execution environment.
 * @return the newly created graph.
 */
public static <K, VV, EV> Graph<K, VV, EV> fromDataSet(DataSet<Edge<K, EV>> edges, final MapFunction<K, VV> vertexValueInitializer, ExecutionEnvironment context) {
    TypeInformation<K> keyType = ((TupleTypeInfo<?>) edges.getType()).getTypeAt(0);
    TypeInformation<VV> valueType = TypeExtractor.createTypeInfo(MapFunction.class, vertexValueInitializer.getClass(), 1, keyType, null);
    @SuppressWarnings({ "unchecked", "rawtypes" }) TypeInformation<Vertex<K, VV>> returnType = (TypeInformation<Vertex<K, VV>>) new TupleTypeInfo(Vertex.class, keyType, valueType);
    DataSet<Vertex<K, VV>> vertices = edges.flatMap(new EmitSrcAndTargetAsTuple1<>()).name("Source and target IDs").distinct().name("IDs").map(new MapFunction<Tuple1<K>, Vertex<K, VV>>() {

        private Vertex<K, VV> output = new Vertex<>();

        public Vertex<K, VV> map(Tuple1<K> value) throws Exception {
            output.f0 = value.f0;
            output.f1 = vertexValueInitializer.map(value.f0);
            return output;
        }
    }).returns(returnType).withForwardedFields("f0").name("Initialize vertex values");
    return new Graph<>(vertices, edges, context);
}
Also used : MapFunction(org.apache.flink.api.common.functions.MapFunction) FlatMapFunction(org.apache.flink.api.common.functions.FlatMapFunction) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) Tuple1(org.apache.flink.api.java.tuple.Tuple1)

Example 43 with TupleTypeInfo

use of org.apache.flink.api.java.typeutils.TupleTypeInfo in project flink by apache.

the class PythonBridgeUtils method getPickledBytesFromJavaObject.

public static Object getPickledBytesFromJavaObject(Object obj, TypeInformation<?> dataType) throws IOException {
    Pickler pickler = new Pickler();
    initialize();
    if (obj == null) {
        return new byte[0];
    } else {
        if (dataType instanceof SqlTimeTypeInfo) {
            SqlTimeTypeInfo<?> sqlTimeTypeInfo = SqlTimeTypeInfo.getInfoFor(dataType.getTypeClass());
            if (sqlTimeTypeInfo == DATE) {
                return pickler.dumps(((Date) obj).toLocalDate().toEpochDay());
            } else if (sqlTimeTypeInfo == TIME) {
                return pickler.dumps(((Time) obj).toLocalTime().toNanoOfDay() / 1000);
            }
        } else if (dataType instanceof RowTypeInfo || dataType instanceof TupleTypeInfo) {
            TypeInformation<?>[] fieldTypes = ((TupleTypeInfoBase<?>) dataType).getFieldTypes();
            int arity = dataType instanceof RowTypeInfo ? ((Row) obj).getArity() : ((Tuple) obj).getArity();
            List<Object> fieldBytes = new ArrayList<>(arity + 1);
            if (dataType instanceof RowTypeInfo) {
                fieldBytes.add(new byte[] { ((Row) obj).getKind().toByteValue() });
            }
            for (int i = 0; i < arity; i++) {
                Object field = dataType instanceof RowTypeInfo ? ((Row) obj).getField(i) : ((Tuple) obj).getField(i);
                fieldBytes.add(getPickledBytesFromJavaObject(field, fieldTypes[i]));
            }
            return fieldBytes;
        } else if (dataType instanceof BasicArrayTypeInfo || dataType instanceof PrimitiveArrayTypeInfo) {
            Object[] objects = (Object[]) obj;
            List<Object> serializedElements = new ArrayList<>(objects.length);
            TypeInformation<?> elementType = dataType instanceof BasicArrayTypeInfo ? ((BasicArrayTypeInfo<?, ?>) dataType).getComponentInfo() : ((PrimitiveArrayTypeInfo<?>) dataType).getComponentType();
            for (Object object : objects) {
                serializedElements.add(getPickledBytesFromJavaObject(object, elementType));
            }
            return pickler.dumps(serializedElements);
        } else if (dataType instanceof MapTypeInfo) {
            List<List<Object>> serializedMapKV = new ArrayList<>(2);
            Map<Object, Object> mapObj = (Map) obj;
            List<Object> keyBytesList = new ArrayList<>(mapObj.size());
            List<Object> valueBytesList = new ArrayList<>(mapObj.size());
            for (Map.Entry entry : mapObj.entrySet()) {
                keyBytesList.add(getPickledBytesFromJavaObject(entry.getKey(), ((MapTypeInfo) dataType).getKeyTypeInfo()));
                valueBytesList.add(getPickledBytesFromJavaObject(entry.getValue(), ((MapTypeInfo) dataType).getValueTypeInfo()));
            }
            serializedMapKV.add(keyBytesList);
            serializedMapKV.add(valueBytesList);
            return pickler.dumps(serializedMapKV);
        } else if (dataType instanceof ListTypeInfo) {
            List objects = (List) obj;
            List<Object> serializedElements = new ArrayList<>(objects.size());
            TypeInformation elementType = ((ListTypeInfo) dataType).getElementTypeInfo();
            for (Object object : objects) {
                serializedElements.add(getPickledBytesFromJavaObject(object, elementType));
            }
            return pickler.dumps(serializedElements);
        }
        if (dataType instanceof BasicTypeInfo && BasicTypeInfo.getInfoFor(dataType.getTypeClass()) == FLOAT_TYPE_INFO) {
            // Serialization of float type with pickler loses precision.
            return pickler.dumps(String.valueOf(obj));
        } else if (dataType instanceof PickledByteArrayTypeInfo || dataType instanceof BasicTypeInfo) {
            return pickler.dumps(obj);
        } else {
            // other typeinfos will use the corresponding serializer to serialize data.
            TypeSerializer serializer = dataType.createSerializer(null);
            ByteArrayOutputStreamWithPos baos = new ByteArrayOutputStreamWithPos();
            DataOutputViewStreamWrapper baosWrapper = new DataOutputViewStreamWrapper(baos);
            serializer.serialize(obj, baosWrapper);
            return pickler.dumps(baos.toByteArray());
        }
    }
}
Also used : PickledByteArrayTypeInfo(org.apache.flink.streaming.api.typeinfo.python.PickledByteArrayTypeInfo) ArrayList(java.util.ArrayList) Time(java.sql.Time) LocalDateTime(java.time.LocalDateTime) LocalTime(java.time.LocalTime) RowTypeInfo(org.apache.flink.api.java.typeutils.RowTypeInfo) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) BasicTypeInfo(org.apache.flink.api.common.typeinfo.BasicTypeInfo) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) List(java.util.List) ByteArrayOutputStreamWithPos(org.apache.flink.core.memory.ByteArrayOutputStreamWithPos) Date(java.sql.Date) LocalDate(java.time.LocalDate) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) SqlTimeTypeInfo(org.apache.flink.api.common.typeinfo.SqlTimeTypeInfo) DataOutputViewStreamWrapper(org.apache.flink.core.memory.DataOutputViewStreamWrapper) ListTypeInfo(org.apache.flink.api.java.typeutils.ListTypeInfo) MapTypeInfo(org.apache.flink.api.java.typeutils.MapTypeInfo) PrimitiveArrayTypeInfo(org.apache.flink.api.common.typeinfo.PrimitiveArrayTypeInfo) Row(org.apache.flink.types.Row) Pickler(net.razorvine.pickle.Pickler) Map(java.util.Map) Tuple(org.apache.flink.api.java.tuple.Tuple) BasicArrayTypeInfo(org.apache.flink.api.common.typeinfo.BasicArrayTypeInfo)

Example 44 with TupleTypeInfo

use of org.apache.flink.api.java.typeutils.TupleTypeInfo in project flink by apache.

the class LargeRecordHandlerTest method testEmptyRecordHandler.

@Test
public void testEmptyRecordHandler() {
    final int PAGE_SIZE = 4 * 1024;
    final int NUM_PAGES = 50;
    try (final IOManager ioMan = new IOManagerAsync()) {
        final MemoryManager memMan = MemoryManagerBuilder.newBuilder().setMemorySize(NUM_PAGES * PAGE_SIZE).setPageSize(PAGE_SIZE).build();
        final AbstractInvokable owner = new DummyInvokable();
        final List<MemorySegment> memory = memMan.allocatePages(owner, NUM_PAGES);
        final TupleTypeInfo<Tuple2<Long, String>> typeInfo = (TupleTypeInfo<Tuple2<Long, String>>) TypeInformation.of(new TypeHint<Tuple2<Long, String>>() {
        });
        final TypeSerializer<Tuple2<Long, String>> serializer = typeInfo.createSerializer(new ExecutionConfig());
        final TypeComparator<Tuple2<Long, String>> comparator = typeInfo.createComparator(new int[] { 0 }, new boolean[] { true }, 0, new ExecutionConfig());
        LargeRecordHandler<Tuple2<Long, String>> handler = new LargeRecordHandler<Tuple2<Long, String>>(serializer, comparator, ioMan, memMan, memory, owner, 128, owner.getExecutionConfig());
        assertFalse(handler.hasData());
        handler.close();
        assertFalse(handler.hasData());
        handler.close();
        try {
            handler.addRecord(new Tuple2<Long, String>(92L, "peter pepper"));
            fail("should throw an exception");
        } catch (IllegalStateException e) {
        // expected
        }
        assertTrue(memMan.verifyEmpty());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : TypeHint(org.apache.flink.api.common.typeinfo.TypeHint) IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) AbstractInvokable(org.apache.flink.runtime.jobgraph.tasks.AbstractInvokable) TypeHint(org.apache.flink.api.common.typeinfo.TypeHint) MemorySegment(org.apache.flink.core.memory.MemorySegment) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) IOManagerAsync(org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync) Tuple2(org.apache.flink.api.java.tuple.Tuple2) DummyInvokable(org.apache.flink.runtime.operators.testutils.DummyInvokable) Test(org.junit.Test)

Example 45 with TupleTypeInfo

use of org.apache.flink.api.java.typeutils.TupleTypeInfo in project flink by apache.

the class LargeRecordHandlerTest method testRecordHandlerSingleKey.

@Test
public void testRecordHandlerSingleKey() {
    final int PAGE_SIZE = 4 * 1024;
    final int NUM_PAGES = 24;
    final int NUM_RECORDS = 25000;
    try (final IOManager ioMan = new IOManagerAsync()) {
        final MemoryManager memMan = MemoryManagerBuilder.newBuilder().setMemorySize(NUM_PAGES * PAGE_SIZE).setPageSize(PAGE_SIZE).build();
        final AbstractInvokable owner = new DummyInvokable();
        final List<MemorySegment> initialMemory = memMan.allocatePages(owner, 6);
        final List<MemorySegment> sortMemory = memMan.allocatePages(owner, NUM_PAGES - 6);
        final TupleTypeInfo<Tuple2<Long, String>> typeInfo = (TupleTypeInfo<Tuple2<Long, String>>) TypeInformation.of(new TypeHint<Tuple2<Long, String>>() {
        });
        final TypeSerializer<Tuple2<Long, String>> serializer = typeInfo.createSerializer(new ExecutionConfig());
        final TypeComparator<Tuple2<Long, String>> comparator = typeInfo.createComparator(new int[] { 0 }, new boolean[] { true }, 0, new ExecutionConfig());
        LargeRecordHandler<Tuple2<Long, String>> handler = new LargeRecordHandler<Tuple2<Long, String>>(serializer, comparator, ioMan, memMan, initialMemory, owner, 128, owner.getExecutionConfig());
        assertFalse(handler.hasData());
        // add the test data
        Random rnd = new Random();
        for (int i = 0; i < NUM_RECORDS; i++) {
            long val = rnd.nextLong();
            handler.addRecord(new Tuple2<Long, String>(val, String.valueOf(val)));
            assertTrue(handler.hasData());
        }
        MutableObjectIterator<Tuple2<Long, String>> sorted = handler.finishWriteAndSortKeys(sortMemory);
        try {
            handler.addRecord(new Tuple2<Long, String>(92L, "peter pepper"));
            fail("should throw an exception");
        } catch (IllegalStateException e) {
        // expected
        }
        Tuple2<Long, String> previous = null;
        Tuple2<Long, String> next;
        while ((next = sorted.next(null)) != null) {
            // key and value must be equal
            assertTrue(next.f0.equals(Long.parseLong(next.f1)));
            // order must be correct
            if (previous != null) {
                assertTrue(previous.f0 <= next.f0);
            }
            previous = next;
        }
        handler.close();
        assertFalse(handler.hasData());
        handler.close();
        try {
            handler.addRecord(new Tuple2<Long, String>(92L, "peter pepper"));
            fail("should throw an exception");
        } catch (IllegalStateException e) {
        // expected
        }
        assertTrue(memMan.verifyEmpty());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : TypeHint(org.apache.flink.api.common.typeinfo.TypeHint) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) AbstractInvokable(org.apache.flink.runtime.jobgraph.tasks.AbstractInvokable) IOManagerAsync(org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync) Random(java.util.Random) DummyInvokable(org.apache.flink.runtime.operators.testutils.DummyInvokable) IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) TypeHint(org.apache.flink.api.common.typeinfo.TypeHint) MemorySegment(org.apache.flink.core.memory.MemorySegment) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Test(org.junit.Test)

Aggregations

TupleTypeInfo (org.apache.flink.api.java.typeutils.TupleTypeInfo)52 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)32 Test (org.junit.Test)32 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)21 TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)14 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)12 StringValue (org.apache.flink.types.StringValue)12 Random (java.util.Random)11 ArrayList (java.util.ArrayList)10 IOException (java.io.IOException)8 IOManager (org.apache.flink.runtime.io.disk.iomanager.IOManager)7 IOManagerAsync (org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync)7 MemoryManager (org.apache.flink.runtime.memory.MemoryManager)7 DummyInvokable (org.apache.flink.runtime.operators.testutils.DummyInvokable)7 IntValue (org.apache.flink.types.IntValue)7 ValueTypeInfo (org.apache.flink.api.java.typeutils.ValueTypeInfo)6 List (java.util.List)5 TypeHint (org.apache.flink.api.common.typeinfo.TypeHint)5 MemorySegment (org.apache.flink.core.memory.MemorySegment)5 AbstractInvokable (org.apache.flink.runtime.jobgraph.tasks.AbstractInvokable)5