Search in sources :

Example 16 with TypeInformation

use of org.apache.flink.api.common.typeinfo.TypeInformation in project flink by apache.

the class ExternalSortLargeRecordsITCase method testSortWithLongRecordsOnly.

// --------------------------------------------------------------------------------------------
@Test
public void testSortWithLongRecordsOnly() {
    try {
        final int NUM_RECORDS = 10;
        final TypeInformation<?>[] types = new TypeInformation<?>[] { BasicTypeInfo.LONG_TYPE_INFO, new ValueTypeInfo<SomeMaybeLongValue>(SomeMaybeLongValue.class) };
        final TupleTypeInfo<Tuple2<Long, SomeMaybeLongValue>> typeInfo = new TupleTypeInfo<Tuple2<Long, SomeMaybeLongValue>>(types);
        final TypeSerializer<Tuple2<Long, SomeMaybeLongValue>> serializer = typeInfo.createSerializer(new ExecutionConfig());
        final TypeComparator<Tuple2<Long, SomeMaybeLongValue>> comparator = typeInfo.createComparator(new int[] { 0 }, new boolean[] { false }, 0, new ExecutionConfig());
        MutableObjectIterator<Tuple2<Long, SomeMaybeLongValue>> source = new MutableObjectIterator<Tuple2<Long, SomeMaybeLongValue>>() {

            private final Random rnd = new Random(457821643089756298L);

            private int num = 0;

            @Override
            public Tuple2<Long, SomeMaybeLongValue> next(Tuple2<Long, SomeMaybeLongValue> reuse) {
                return next();
            }

            @Override
            public Tuple2<Long, SomeMaybeLongValue> next() {
                if (num++ < NUM_RECORDS) {
                    long val = rnd.nextLong();
                    return new Tuple2<Long, SomeMaybeLongValue>(val, new SomeMaybeLongValue((int) val));
                } else {
                    return null;
                }
            }
        };
        @SuppressWarnings("unchecked") Sorter<Tuple2<Long, SomeMaybeLongValue>> sorter = new UnilateralSortMerger<Tuple2<Long, SomeMaybeLongValue>>(this.memoryManager, this.ioManager, source, this.parentTask, new RuntimeSerializerFactory<Tuple2<Long, SomeMaybeLongValue>>(serializer, (Class<Tuple2<Long, SomeMaybeLongValue>>) (Class<?>) Tuple2.class), comparator, 1.0, 1, 128, 0.7f, true, /* use large record handler */
        false);
        // check order
        MutableObjectIterator<Tuple2<Long, SomeMaybeLongValue>> iterator = sorter.getIterator();
        Tuple2<Long, SomeMaybeLongValue> val = serializer.createInstance();
        long prevKey = Long.MAX_VALUE;
        for (int i = 0; i < NUM_RECORDS; i++) {
            val = iterator.next(val);
            assertTrue(val.f0 <= prevKey);
            assertTrue(val.f0.intValue() == val.f1.val());
        }
        assertNull(iterator.next(val));
        sorter.close();
        testSuccess = true;
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : MutableObjectIterator(org.apache.flink.util.MutableObjectIterator) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) IOException(java.io.IOException) Random(java.util.Random) Tuple2(org.apache.flink.api.java.tuple.Tuple2) ValueTypeInfo(org.apache.flink.api.java.typeutils.ValueTypeInfo) Test(org.junit.Test)

Example 17 with TypeInformation

use of org.apache.flink.api.common.typeinfo.TypeInformation in project flink by apache.

the class LargeRecordHandlerITCase method fileTest.

@Test
public void fileTest() {
    final IOManager ioMan = new IOManagerAsync();
    final int PAGE_SIZE = 4 * 1024;
    final int NUM_PAGES = 4;
    final int NUM_RECORDS = 10;
    FileIOChannel.ID channel = null;
    try {
        final MemoryManager memMan = new MemoryManager(NUM_PAGES * PAGE_SIZE, 1, PAGE_SIZE, MemoryType.HEAP, true);
        final AbstractInvokable owner = new DummyInvokable();
        final List<MemorySegment> memory = memMan.allocatePages(owner, NUM_PAGES);
        final TypeInformation<?>[] types = new TypeInformation<?>[] { BasicTypeInfo.LONG_TYPE_INFO, new ValueTypeInfo<SomeVeryLongValue>(SomeVeryLongValue.class), BasicTypeInfo.BYTE_TYPE_INFO };
        final TupleTypeInfo<Tuple3<Long, SomeVeryLongValue, Byte>> typeInfo = new TupleTypeInfo<Tuple3<Long, SomeVeryLongValue, Byte>>(types);
        final TypeSerializer<Tuple3<Long, SomeVeryLongValue, Byte>> serializer = typeInfo.createSerializer(new ExecutionConfig());
        channel = ioMan.createChannel();
        FileChannelOutputView out = new FileChannelOutputView(ioMan.createBlockChannelWriter(channel), memMan, memory, PAGE_SIZE);
        // add the test data
        Random rnd = new Random();
        List<Long> offsets = new ArrayList<Long>();
        for (int i = 0; i < NUM_RECORDS; i++) {
            offsets.add(out.getWriteOffset());
            long val = rnd.nextLong();
            Tuple3<Long, SomeVeryLongValue, Byte> next = new Tuple3<Long, SomeVeryLongValue, Byte>(val, new SomeVeryLongValue((int) val), (byte) val);
            serializer.serialize(next, out);
        }
        out.close();
        for (int i = 1; i < offsets.size(); i++) {
            assertTrue(offsets.get(i) > offsets.get(i - 1));
        }
        memMan.allocatePages(owner, memory, NUM_PAGES);
        SeekableFileChannelInputView in = new SeekableFileChannelInputView(ioMan, channel, memMan, memory, out.getBytesInLatestSegment());
        for (int i = 0; i < NUM_RECORDS; i++) {
            in.seek(offsets.get(i));
            Tuple3<Long, SomeVeryLongValue, Byte> next = serializer.deserialize(in);
            // key and value must be equal
            assertTrue(next.f0.intValue() == next.f1.val());
            assertTrue(next.f0.byteValue() == next.f2);
        }
        in.closeAndDelete();
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    } finally {
        if (channel != null) {
            try {
                ioMan.deleteChannel(channel);
            } catch (IOException ignored) {
            }
        }
        ioMan.shutdown();
    }
}
Also used : ArrayList(java.util.ArrayList) FileIOChannel(org.apache.flink.runtime.io.disk.iomanager.FileIOChannel) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) AbstractInvokable(org.apache.flink.runtime.jobgraph.tasks.AbstractInvokable) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) IOManagerAsync(org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync) Random(java.util.Random) DummyInvokable(org.apache.flink.runtime.operators.testutils.DummyInvokable) ValueTypeInfo(org.apache.flink.api.java.typeutils.ValueTypeInfo) SeekableFileChannelInputView(org.apache.flink.runtime.io.disk.SeekableFileChannelInputView) FileChannelOutputView(org.apache.flink.runtime.io.disk.FileChannelOutputView) IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) IOException(java.io.IOException) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) MemorySegment(org.apache.flink.core.memory.MemorySegment) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) IOException(java.io.IOException) Tuple3(org.apache.flink.api.java.tuple.Tuple3) Test(org.junit.Test)

Example 18 with TypeInformation

use of org.apache.flink.api.common.typeinfo.TypeInformation in project flink by apache.

the class LargeRecordHandlerITCase method testRecordHandlerCompositeKey.

@Test
public void testRecordHandlerCompositeKey() {
    final IOManager ioMan = new IOManagerAsync();
    final int PAGE_SIZE = 4 * 1024;
    final int NUM_PAGES = 1000;
    final int NUM_RECORDS = 10;
    try {
        final MemoryManager memMan = new MemoryManager(NUM_PAGES * PAGE_SIZE, 1, PAGE_SIZE, MemoryType.HEAP, true);
        final AbstractInvokable owner = new DummyInvokable();
        final List<MemorySegment> initialMemory = memMan.allocatePages(owner, 6);
        final List<MemorySegment> sortMemory = memMan.allocatePages(owner, NUM_PAGES - 6);
        final TypeInformation<?>[] types = new TypeInformation<?>[] { BasicTypeInfo.LONG_TYPE_INFO, new ValueTypeInfo<SomeVeryLongValue>(SomeVeryLongValue.class), BasicTypeInfo.BYTE_TYPE_INFO };
        final TupleTypeInfo<Tuple3<Long, SomeVeryLongValue, Byte>> typeInfo = new TupleTypeInfo<Tuple3<Long, SomeVeryLongValue, Byte>>(types);
        final TypeSerializer<Tuple3<Long, SomeVeryLongValue, Byte>> serializer = typeInfo.createSerializer(new ExecutionConfig());
        final TypeComparator<Tuple3<Long, SomeVeryLongValue, Byte>> comparator = typeInfo.createComparator(new int[] { 2, 0 }, new boolean[] { true, true }, 0, new ExecutionConfig());
        LargeRecordHandler<Tuple3<Long, SomeVeryLongValue, Byte>> handler = new LargeRecordHandler<Tuple3<Long, SomeVeryLongValue, Byte>>(serializer, comparator, ioMan, memMan, initialMemory, owner, 128);
        assertFalse(handler.hasData());
        // add the test data
        Random rnd = new Random();
        for (int i = 0; i < NUM_RECORDS; i++) {
            long val = rnd.nextLong();
            handler.addRecord(new Tuple3<Long, SomeVeryLongValue, Byte>(val, new SomeVeryLongValue((int) val), (byte) val));
            assertTrue(handler.hasData());
        }
        MutableObjectIterator<Tuple3<Long, SomeVeryLongValue, Byte>> sorted = handler.finishWriteAndSortKeys(sortMemory);
        try {
            handler.addRecord(new Tuple3<Long, SomeVeryLongValue, Byte>(92L, null, (byte) 1));
            fail("should throw an exception");
        } catch (IllegalStateException e) {
        // expected
        }
        Tuple3<Long, SomeVeryLongValue, Byte> previous = null;
        Tuple3<Long, SomeVeryLongValue, Byte> next;
        while ((next = sorted.next(null)) != null) {
            // key and value must be equal
            assertTrue(next.f0.intValue() == next.f1.val());
            assertTrue(next.f0.byteValue() == next.f2);
            // order must be correct
            if (previous != null) {
                assertTrue(previous.f2 <= next.f2);
                assertTrue(previous.f2.byteValue() != next.f2.byteValue() || previous.f0 <= next.f0);
            }
            previous = next;
        }
        handler.close();
        assertFalse(handler.hasData());
        handler.close();
        try {
            handler.addRecord(new Tuple3<Long, SomeVeryLongValue, Byte>(92L, null, (byte) 1));
            fail("should throw an exception");
        } catch (IllegalStateException e) {
        // expected
        }
        assertTrue(memMan.verifyEmpty());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    } finally {
        ioMan.shutdown();
    }
}
Also used : ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) AbstractInvokable(org.apache.flink.runtime.jobgraph.tasks.AbstractInvokable) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) IOManagerAsync(org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync) Random(java.util.Random) DummyInvokable(org.apache.flink.runtime.operators.testutils.DummyInvokable) ValueTypeInfo(org.apache.flink.api.java.typeutils.ValueTypeInfo) IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) MemorySegment(org.apache.flink.core.memory.MemorySegment) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) IOException(java.io.IOException) Tuple3(org.apache.flink.api.java.tuple.Tuple3) Test(org.junit.Test)

Example 19 with TypeInformation

use of org.apache.flink.api.common.typeinfo.TypeInformation in project flink by apache.

the class HBaseRowInputFormat method getProducedType.

@Override
public TypeInformation<Row> getProducedType() {
    // split the fieldNames
    String[] famNames = schema.getFamilyNames();
    TypeInformation<?>[] typeInfos = new TypeInformation[famNames.length];
    int i = 0;
    for (String family : famNames) {
        typeInfos[i] = new RowTypeInfo(schema.getQualifierTypes(family), schema.getQualifierNames(family));
        i++;
    }
    return new RowTypeInfo(typeInfos, famNames);
}
Also used : RowTypeInfo(org.apache.flink.api.java.typeutils.RowTypeInfo) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation)

Example 20 with TypeInformation

use of org.apache.flink.api.common.typeinfo.TypeInformation in project flink by apache.

the class HCatInputFormatBase method asFlinkTuples.

/**
	 * Specifies that the InputFormat returns Flink tuples instead of
	 * {@link org.apache.hive.hcatalog.data.HCatRecord}.
	 *
	 * Note: Flink tuples might only support a limited number of fields (depending on the API).
	 *
	 * @return This InputFormat.
	 * @throws org.apache.hive.hcatalog.common.HCatException
	 */
public HCatInputFormatBase<T> asFlinkTuples() throws HCatException {
    // build type information
    int numFields = outputSchema.getFields().size();
    if (numFields > this.getMaxFlinkTupleSize()) {
        throw new IllegalArgumentException("Only up to " + this.getMaxFlinkTupleSize() + " fields can be returned as Flink tuples.");
    }
    TypeInformation[] fieldTypes = new TypeInformation[numFields];
    fieldNames = new String[numFields];
    for (String fieldName : outputSchema.getFieldNames()) {
        HCatFieldSchema field = outputSchema.get(fieldName);
        int fieldPos = outputSchema.getPosition(fieldName);
        TypeInformation fieldType = getFieldType(field);
        fieldTypes[fieldPos] = fieldType;
        fieldNames[fieldPos] = fieldName;
    }
    this.resultType = new TupleTypeInfo(fieldTypes);
    return this;
}
Also used : TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema)

Aggregations

TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)51 Test (org.junit.Test)28 Row (org.apache.flink.types.Row)21 Configuration (org.apache.flink.configuration.Configuration)20 FileInputSplit (org.apache.flink.core.fs.FileInputSplit)20 TupleTypeInfo (org.apache.flink.api.java.typeutils.TupleTypeInfo)10 ArrayList (java.util.ArrayList)9 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)8 CompositeType (org.apache.flink.api.common.typeutils.CompositeType)8 IOException (java.io.IOException)7 Type (java.lang.reflect.Type)7 GenericArrayType (java.lang.reflect.GenericArrayType)6 ParameterizedType (java.lang.reflect.ParameterizedType)6 Random (java.util.Random)6 InvalidTypesException (org.apache.flink.api.common.functions.InvalidTypesException)6 TypeExtractionUtils.isClassType (org.apache.flink.api.java.typeutils.TypeExtractionUtils.isClassType)6 ValueTypeInfo (org.apache.flink.api.java.typeutils.ValueTypeInfo)6 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)5 TypeVariable (java.lang.reflect.TypeVariable)4 MutableObjectIterator (org.apache.flink.util.MutableObjectIterator)4