Search in sources :

Example 81 with StringValue

use of org.apache.flink.types.StringValue in project flink by apache.

the class ReusingKeyGroupedIteratorTest method setup.

@Before
public void setup() {
    final ArrayList<IntStringPair> source = new ArrayList<IntStringPair>();
    // add elements to the source
    source.add(new IntStringPair(new IntValue(1), new StringValue("A")));
    source.add(new IntStringPair(new IntValue(2), new StringValue("B")));
    source.add(new IntStringPair(new IntValue(3), new StringValue("C")));
    source.add(new IntStringPair(new IntValue(3), new StringValue("D")));
    source.add(new IntStringPair(new IntValue(4), new StringValue("E")));
    source.add(new IntStringPair(new IntValue(4), new StringValue("F")));
    source.add(new IntStringPair(new IntValue(4), new StringValue("G")));
    source.add(new IntStringPair(new IntValue(5), new StringValue("H")));
    source.add(new IntStringPair(new IntValue(5), new StringValue("I")));
    source.add(new IntStringPair(new IntValue(5), new StringValue("J")));
    source.add(new IntStringPair(new IntValue(5), new StringValue("K")));
    source.add(new IntStringPair(new IntValue(5), new StringValue("L")));
    this.sourceIter = new MutableObjectIterator<Record>() {

        final Iterator<IntStringPair> it = source.iterator();

        @Override
        public Record next(Record reuse) throws IOException {
            if (it.hasNext()) {
                IntStringPair pair = it.next();
                reuse.setField(0, pair.getInteger());
                reuse.setField(1, pair.getString());
                return reuse;
            } else {
                return null;
            }
        }

        @Override
        public Record next() throws IOException {
            if (it.hasNext()) {
                IntStringPair pair = it.next();
                Record result = new Record(2);
                result.setField(0, pair.getInteger());
                result.setField(1, pair.getString());
                return result;
            } else {
                return null;
            }
        }
    };
    final RecordSerializer serializer = RecordSerializer.get();
    @SuppressWarnings("unchecked") final RecordComparator comparator = new RecordComparator(new int[] { 0 }, new Class[] { IntValue.class });
    this.psi = new ReusingKeyGroupedIterator<Record>(this.sourceIter, serializer, comparator);
}
Also used : ArrayList(java.util.ArrayList) IOException(java.io.IOException) RecordComparator(org.apache.flink.runtime.testutils.recordutils.RecordComparator) Record(org.apache.flink.types.Record) StringValue(org.apache.flink.types.StringValue) IntValue(org.apache.flink.types.IntValue) RecordSerializer(org.apache.flink.runtime.testutils.recordutils.RecordSerializer) Before(org.junit.Before)

Example 82 with StringValue

use of org.apache.flink.types.StringValue in project flink by apache.

the class ReusingKeyGroupedIteratorTest method testMixedProgress.

@Test
public void testMixedProgress() throws Exception {
    try {
        // Progression only via nextKey() and hasNext() - Key 1, Value A
        Assert.assertTrue("KeyGroupedIterator must have another key.", this.psi.nextKey());
        Assert.assertTrue("KeyGroupedIterator must have another value.", this.psi.getValues().hasNext());
        Assert.assertTrue(hasIterator(this.psi.getValues()));
        Assert.assertFalse(hasIterator(this.psi.getValues()));
        // Progression only through nextKey() - Key 2, Value B
        Assert.assertTrue("KeyGroupedIterator must have another key.", this.psi.nextKey());
        Assert.assertTrue(hasIterator(this.psi.getValues()));
        Assert.assertFalse(hasIterator(this.psi.getValues()));
        // Progression first though haNext() and next(), then through hasNext() - Key 3, Values
        // C, D
        Assert.assertTrue("KeyGroupedIterator must have another key.", this.psi.nextKey());
        Assert.assertTrue(hasIterator(this.psi.getValues()));
        Assert.assertFalse(hasIterator(this.psi.getValues()));
        Assert.assertTrue("KeyGroupedIterator must have another value.", this.psi.getValues().hasNext());
        Assert.assertTrue("KeyGroupedIterator returned a wrong key.", this.psi.getComparatorWithCurrentReference().equalToReference(new Record(new IntValue(3))));
        Assert.assertEquals("KeyGroupedIterator returned a wrong key.", 3, this.psi.getCurrent().getField(0, IntValue.class).getValue());
        Assert.assertEquals("KeyGroupedIterator returned a wrong value.", new StringValue("C"), this.psi.getValues().next().getField(1, StringValue.class));
        Assert.assertTrue("KeyGroupedIterator must have another value.", this.psi.getValues().hasNext());
        Assert.assertTrue("KeyGroupedIterator returned a wrong key.", this.psi.getComparatorWithCurrentReference().equalToReference(new Record(new IntValue(3))));
        Assert.assertEquals("KeyGroupedIterator returned a wrong key.", 3, this.psi.getCurrent().getField(0, IntValue.class).getValue());
        // Progression first via next() only, then hasNext() only Key 4, Values E, F, G
        Assert.assertTrue("KeyGroupedIterator must have another key.", this.psi.nextKey());
        Assert.assertTrue(hasIterator(this.psi.getValues()));
        Assert.assertFalse(hasIterator(this.psi.getValues()));
        Assert.assertEquals("KeyGroupedIterator returned a wrong value.", new StringValue("E"), this.psi.getValues().next().getField(1, StringValue.class));
        Assert.assertTrue("KeyGroupedIterator must have another value.", this.psi.getValues().hasNext());
        // Key 5, Values H, I, J, K, L
        Assert.assertTrue("KeyGroupedIterator must have another key.", this.psi.nextKey());
        Assert.assertEquals("KeyGroupedIterator returned a wrong value.", new StringValue("H"), this.psi.getValues().next().getField(1, StringValue.class));
        Assert.assertTrue("KeyGroupedIterator must have another value.", this.psi.getValues().hasNext());
        Assert.assertTrue("KeyGroupedIterator returned a wrong key.", this.psi.getComparatorWithCurrentReference().equalToReference(new Record(new IntValue(5))));
        Assert.assertEquals("KeyGroupedIterator returned a wrong key.", 5, this.psi.getCurrent().getField(0, IntValue.class).getValue());
        Assert.assertEquals("KeyGroupedIterator returned a wrong value.", new StringValue("I"), this.psi.getValues().next().getField(1, StringValue.class));
        Assert.assertTrue(hasIterator(this.psi.getValues()));
        Assert.assertFalse(hasIterator(this.psi.getValues()));
        Assert.assertTrue("KeyGroupedIterator must have another value.", this.psi.getValues().hasNext());
        // end
        Assert.assertFalse("KeyGroupedIterator must not have another key.", this.psi.nextKey());
        Assert.assertFalse("KeyGroupedIterator must not have another key.", this.psi.nextKey());
    } catch (Exception e) {
        e.printStackTrace();
        Assert.fail("The test encountered an unexpected exception.");
    }
}
Also used : Record(org.apache.flink.types.Record) StringValue(org.apache.flink.types.StringValue) IntValue(org.apache.flink.types.IntValue) IOException(java.io.IOException) TraversableOnceException(org.apache.flink.util.TraversableOnceException) NoSuchElementException(java.util.NoSuchElementException) Test(org.junit.Test)

Example 83 with StringValue

use of org.apache.flink.types.StringValue in project flink by apache.

the class GroupReduceDriverTest method testAllReduceDriverMutable.

@Test
public void testAllReduceDriverMutable() {
    try {
        TestTaskContext<GroupReduceFunction<Tuple2<StringValue, IntValue>, Tuple2<StringValue, IntValue>>, Tuple2<StringValue, IntValue>> context = new TestTaskContext<GroupReduceFunction<Tuple2<StringValue, IntValue>, Tuple2<StringValue, IntValue>>, Tuple2<StringValue, IntValue>>();
        List<Tuple2<StringValue, IntValue>> data = DriverTestData.createReduceMutableData();
        TupleTypeInfo<Tuple2<StringValue, IntValue>> typeInfo = (TupleTypeInfo<Tuple2<StringValue, IntValue>>) TypeExtractor.getForObject(data.get(0));
        MutableObjectIterator<Tuple2<StringValue, IntValue>> input = new RegularToMutableObjectIterator<Tuple2<StringValue, IntValue>>(data.iterator(), typeInfo.createSerializer(new ExecutionConfig()));
        TypeComparator<Tuple2<StringValue, IntValue>> comparator = typeInfo.createComparator(new int[] { 0 }, new boolean[] { true }, 0, new ExecutionConfig());
        GatheringCollector<Tuple2<StringValue, IntValue>> result = new GatheringCollector<Tuple2<StringValue, IntValue>>(typeInfo.createSerializer(new ExecutionConfig()));
        context.setDriverStrategy(DriverStrategy.SORTED_GROUP_REDUCE);
        context.setInput1(input, typeInfo.createSerializer(new ExecutionConfig()));
        context.setComparator1(comparator);
        context.setCollector(result);
        context.setUdf(new ConcatSumMutableReducer());
        GroupReduceDriver<Tuple2<StringValue, IntValue>, Tuple2<StringValue, IntValue>> driver = new GroupReduceDriver<Tuple2<StringValue, IntValue>, Tuple2<StringValue, IntValue>>();
        driver.setup(context);
        driver.prepare();
        driver.run();
        Object[] res = result.getList().toArray();
        Object[] expected = DriverTestData.createReduceMutableDataGroupedResult().toArray();
        DriverTestData.compareTupleArrays(expected, res);
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        Assert.fail(e.getMessage());
    }
}
Also used : GroupReduceFunction(org.apache.flink.api.common.functions.GroupReduceFunction) RichGroupReduceFunction(org.apache.flink.api.common.functions.RichGroupReduceFunction) GroupReduceDriver(org.apache.flink.runtime.operators.GroupReduceDriver) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StringValue(org.apache.flink.types.StringValue) IntValue(org.apache.flink.types.IntValue) RegularToMutableObjectIterator(org.apache.flink.runtime.util.RegularToMutableObjectIterator) Test(org.junit.Test)

Example 84 with StringValue

use of org.apache.flink.types.StringValue in project flink by apache.

the class AccumulatorITCase method postSubmit.

@Override
protected void postSubmit() throws Exception {
    compareResultsByLinesInMemory(EXPECTED, resultPath);
    // Test accumulator results
    System.out.println("Accumulator results:");
    JobExecutionResult res = this.result;
    System.out.println(AccumulatorHelper.getResultsFormatted(res.getAllAccumulatorResults()));
    Assert.assertEquals(Integer.valueOf(3), res.getAccumulatorResult("num-lines"));
    Assert.assertEquals(Integer.valueOf(3), res.getIntCounterResult("num-lines"));
    Assert.assertEquals(Double.valueOf(getParallelism()), res.getAccumulatorResult("open-close-counter"));
    // Test histogram (words per line distribution)
    Map<Integer, Integer> dist = new HashMap<>();
    dist.put(1, 1);
    dist.put(2, 1);
    dist.put(3, 1);
    Assert.assertEquals(dist, res.getAccumulatorResult("words-per-line"));
    // Test distinct words (custom accumulator)
    Set<StringValue> distinctWords = new HashSet<>();
    distinctWords.add(new StringValue("one"));
    distinctWords.add(new StringValue("two"));
    distinctWords.add(new StringValue("three"));
    Assert.assertEquals(distinctWords, res.getAccumulatorResult("distinct-words"));
}
Also used : JobExecutionResult(org.apache.flink.api.common.JobExecutionResult) HashMap(java.util.HashMap) StringValue(org.apache.flink.types.StringValue) HashSet(java.util.HashSet)

Example 85 with StringValue

use of org.apache.flink.types.StringValue in project flink by apache.

the class MassiveStringValueSorting method testStringValueSorting.

public void testStringValueSorting() {
    File input = null;
    File sorted = null;
    try {
        // the source file
        input = generateFileWithStrings(300000, "http://some-uri.com/that/is/a/common/prefix/to/all");
        // the sorted file
        sorted = File.createTempFile("sorted_strings", "txt");
        String[] command = { "/bin/bash", "-c", "export LC_ALL=\"C\" && cat \"" + input.getAbsolutePath() + "\" | sort > \"" + sorted.getAbsolutePath() + "\"" };
        Process p = null;
        try {
            p = Runtime.getRuntime().exec(command);
            int retCode = p.waitFor();
            if (retCode != 0) {
                throw new Exception("Command failed with return code " + retCode);
            }
            p = null;
        } finally {
            if (p != null) {
                p.destroy();
            }
        }
        // sort the data
        Sorter<StringValue> sorter = null;
        BufferedReader reader = null;
        BufferedReader verifyReader = null;
        MemoryManager mm = null;
        try (IOManager ioMan = new IOManagerAsync()) {
            mm = MemoryManagerBuilder.newBuilder().setMemorySize(1024 * 1024).build();
            TypeSerializer<StringValue> serializer = new CopyableValueSerializer<StringValue>(StringValue.class);
            TypeComparator<StringValue> comparator = new CopyableValueComparator<StringValue>(true, StringValue.class);
            reader = new BufferedReader(new FileReader(input));
            MutableObjectIterator<StringValue> inputIterator = new StringValueReaderMutableObjectIterator(reader);
            sorter = ExternalSorter.newBuilder(mm, new DummyInvokable(), serializer, comparator).maxNumFileHandles(128).enableSpilling(ioMan, 0.8f).memoryFraction(1.0).objectReuse(true).largeRecords(true).build(inputIterator);
            MutableObjectIterator<StringValue> sortedData = sorter.getIterator();
            reader.close();
            // verify
            verifyReader = new BufferedReader(new FileReader(sorted));
            String nextVerify;
            StringValue nextFromFlinkSort = new StringValue();
            while ((nextVerify = verifyReader.readLine()) != null) {
                nextFromFlinkSort = sortedData.next(nextFromFlinkSort);
                Assert.assertNotNull(nextFromFlinkSort);
                Assert.assertEquals(nextVerify, nextFromFlinkSort.getValue());
            }
        } finally {
            if (reader != null) {
                reader.close();
            }
            if (verifyReader != null) {
                verifyReader.close();
            }
            if (sorter != null) {
                sorter.close();
            }
            if (mm != null) {
                mm.shutdown();
            }
        }
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        Assert.fail(e.getMessage());
    } finally {
        if (input != null) {
            // noinspection ResultOfMethodCallIgnored
            input.delete();
        }
        if (sorted != null) {
            // noinspection ResultOfMethodCallIgnored
            sorted.delete();
        }
    }
}
Also used : IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) CopyableValueSerializer(org.apache.flink.api.java.typeutils.runtime.CopyableValueSerializer) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) TypeHint(org.apache.flink.api.common.typeinfo.TypeHint) IOException(java.io.IOException) CopyableValueComparator(org.apache.flink.api.java.typeutils.runtime.CopyableValueComparator) IOManagerAsync(org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) DummyInvokable(org.apache.flink.runtime.operators.testutils.DummyInvokable) StringValue(org.apache.flink.types.StringValue) File(java.io.File)

Aggregations

StringValue (org.apache.flink.types.StringValue)88 Test (org.junit.Test)61 IntValue (org.apache.flink.types.IntValue)35 LongValue (org.apache.flink.types.LongValue)21 IOException (java.io.IOException)17 ArrayList (java.util.ArrayList)15 Record (org.apache.flink.types.Record)13 TupleTypeInfo (org.apache.flink.api.java.typeutils.TupleTypeInfo)12 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)11 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)11 DoubleValue (org.apache.flink.types.DoubleValue)11 Value (org.apache.flink.types.Value)10 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)9 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)7 Plan (org.apache.flink.api.common.Plan)7 Configuration (org.apache.flink.configuration.Configuration)7 FileInputSplit (org.apache.flink.core.fs.FileInputSplit)7 NoSuchElementException (java.util.NoSuchElementException)6 File (java.io.File)5 JobExecutionResult (org.apache.flink.api.common.JobExecutionResult)5