Search in sources :

Example 1 with Value

use of org.apache.flink.types.Value in project flink by apache.

the class CollectionExecutor method executeBulkIteration.

@SuppressWarnings("unchecked")
private <T> List<T> executeBulkIteration(BulkIterationBase<?> iteration) throws Exception {
    Operator<?> inputOp = iteration.getInput();
    if (inputOp == null) {
        throw new InvalidProgramException("The iteration " + iteration.getName() + " has no input (initial partial solution).");
    }
    if (iteration.getNextPartialSolution() == null) {
        throw new InvalidProgramException("The iteration " + iteration.getName() + " has no next partial solution defined (is not closed).");
    }
    List<T> inputData = (List<T>) execute(inputOp);
    // get the operators that are iterative
    Set<Operator<?>> dynamics = new LinkedHashSet<Operator<?>>();
    DynamicPathCollector dynCollector = new DynamicPathCollector(dynamics);
    iteration.getNextPartialSolution().accept(dynCollector);
    if (iteration.getTerminationCriterion() != null) {
        iteration.getTerminationCriterion().accept(dynCollector);
    }
    // register the aggregators
    for (AggregatorWithName<?> a : iteration.getAggregators().getAllRegisteredAggregators()) {
        aggregators.put(a.getName(), a.getAggregator());
    }
    String convCriterionAggName = iteration.getAggregators().getConvergenceCriterionAggregatorName();
    ConvergenceCriterion<Value> convCriterion = (ConvergenceCriterion<Value>) iteration.getAggregators().getConvergenceCriterion();
    List<T> currentResult = inputData;
    final int maxIterations = iteration.getMaximumNumberOfIterations();
    for (int superstep = 1; superstep <= maxIterations; superstep++) {
        // set the input to the current partial solution
        this.intermediateResults.put(iteration.getPartialSolution(), currentResult);
        // set the superstep number
        iterationSuperstep = superstep;
        // grab the current iteration result
        currentResult = (List<T>) execute(iteration.getNextPartialSolution(), superstep);
        // evaluate the termination criterion
        if (iteration.getTerminationCriterion() != null) {
            execute(iteration.getTerminationCriterion(), superstep);
        }
        // evaluate the aggregator convergence criterion
        if (convCriterion != null && convCriterionAggName != null) {
            Value v = aggregators.get(convCriterionAggName).getAggregate();
            if (convCriterion.isConverged(superstep, v)) {
                break;
            }
        }
        // clear the dynamic results
        for (Operator<?> o : dynamics) {
            intermediateResults.remove(o);
        }
        // set the previous iteration's aggregates and reset the aggregators
        for (Map.Entry<String, Aggregator<?>> e : aggregators.entrySet()) {
            previousAggregates.put(e.getKey(), e.getValue().getAggregate());
            e.getValue().reset();
        }
    }
    previousAggregates.clear();
    aggregators.clear();
    return currentResult;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) Aggregator(org.apache.flink.api.common.aggregators.Aggregator) ConvergenceCriterion(org.apache.flink.api.common.aggregators.ConvergenceCriterion) InvalidProgramException(org.apache.flink.api.common.InvalidProgramException) Value(org.apache.flink.types.Value) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map)

Example 2 with Value

use of org.apache.flink.types.Value in project flink by apache.

the class GenericCsvInputFormatTest method readWithHeaderLineAndInvalidIntermediate.

@Test
public void readWithHeaderLineAndInvalidIntermediate() {
    try {
        final String fileContent = "colname-1|colname-2|some name 3|column four|\n" + "123|abc|456|def|\n" + // repeated header in the middle
        "colname-1|colname-2|some name 3|column four|\n" + "987|xyz|654|pqr|\n";
        final FileInputSplit split = createTempFile(fileContent);
        final Configuration parameters = new Configuration();
        format.setFieldDelimiter("|");
        format.setFieldTypesGeneric(IntValue.class, StringValue.class, IntValue.class, StringValue.class);
        format.setSkipFirstLineAsHeader(true);
        format.configure(parameters);
        format.open(split);
        Value[] values = new Value[] { new IntValue(), new StringValue(), new IntValue(), new StringValue() };
        // first line is skipped as header
        //  first row (= second line)
        assertNotNull(format.nextRecord(values));
        try {
            format.nextRecord(values);
            fail("Format accepted invalid line.");
        } catch (ParseException e) {
        // as we expected
        }
    } catch (Exception ex) {
        fail("Test failed due to a " + ex.getClass().getSimpleName() + ": " + ex.getMessage());
    }
}
Also used : FileInputSplit(org.apache.flink.core.fs.FileInputSplit) Configuration(org.apache.flink.configuration.Configuration) IntValue(org.apache.flink.types.IntValue) DoubleValue(org.apache.flink.types.DoubleValue) LongValue(org.apache.flink.types.LongValue) Value(org.apache.flink.types.Value) StringValue(org.apache.flink.types.StringValue) StringValue(org.apache.flink.types.StringValue) IntValue(org.apache.flink.types.IntValue) IOException(java.io.IOException) Test(org.junit.Test)

Example 3 with Value

use of org.apache.flink.types.Value in project flink by apache.

the class GenericCsvInputFormatTest method testReadInvalidContentsLenientWithSkipping.

@Test
public void testReadInvalidContentsLenientWithSkipping() {
    try {
        final String fileContent = // good line
        "abc|dfgsdf|777|444\n" + // wrong data type in field
        "kkz|777|foobar|hhg\n" + // too short, a skipped field never ends
        "kkz|777foobarhhg  \n" + // another good line
        "xyx|ignored|42|\n";
        final FileInputSplit split = createTempFile(fileContent);
        final Configuration parameters = new Configuration();
        format.setFieldDelimiter("|");
        format.setFieldTypesGeneric(StringValue.class, null, IntValue.class);
        format.setLenient(true);
        format.configure(parameters);
        format.open(split);
        Value[] values = new Value[] { new StringValue(), new IntValue() };
        assertNotNull(format.nextRecord(values));
        assertNull(format.nextRecord(values));
        assertNull(format.nextRecord(values));
        assertNotNull(format.nextRecord(values));
    } catch (Exception ex) {
        fail("Test failed due to a " + ex.getClass().getSimpleName() + ": " + ex.getMessage());
    }
}
Also used : FileInputSplit(org.apache.flink.core.fs.FileInputSplit) Configuration(org.apache.flink.configuration.Configuration) IntValue(org.apache.flink.types.IntValue) DoubleValue(org.apache.flink.types.DoubleValue) LongValue(org.apache.flink.types.LongValue) Value(org.apache.flink.types.Value) StringValue(org.apache.flink.types.StringValue) StringValue(org.apache.flink.types.StringValue) IntValue(org.apache.flink.types.IntValue) IOException(java.io.IOException) Test(org.junit.Test)

Example 4 with Value

use of org.apache.flink.types.Value in project flink by apache.

the class GenericCsvInputFormatTest method testReadNoPosAll.

@Test
public void testReadNoPosAll() throws IOException {
    try {
        final String fileContent = "111|222|333|444|555\n666|777|888|999|000|";
        final FileInputSplit split = createTempFile(fileContent);
        final Configuration parameters = new Configuration();
        format.setFieldDelimiter("|");
        format.setFieldTypesGeneric(IntValue.class, IntValue.class, IntValue.class, IntValue.class, IntValue.class);
        format.configure(parameters);
        format.open(split);
        Value[] values = createIntValues(5);
        values = format.nextRecord(values);
        assertNotNull(values);
        assertEquals(111, ((IntValue) values[0]).getValue());
        assertEquals(222, ((IntValue) values[1]).getValue());
        assertEquals(333, ((IntValue) values[2]).getValue());
        assertEquals(444, ((IntValue) values[3]).getValue());
        assertEquals(555, ((IntValue) values[4]).getValue());
        values = format.nextRecord(values);
        assertNotNull(values);
        assertEquals(666, ((IntValue) values[0]).getValue());
        assertEquals(777, ((IntValue) values[1]).getValue());
        assertEquals(888, ((IntValue) values[2]).getValue());
        assertEquals(999, ((IntValue) values[3]).getValue());
        assertEquals(000, ((IntValue) values[4]).getValue());
        assertNull(format.nextRecord(values));
        assertTrue(format.reachedEnd());
    } catch (Exception ex) {
        fail("Test failed due to a " + ex.getClass().getSimpleName() + ": " + ex.getMessage());
    }
}
Also used : FileInputSplit(org.apache.flink.core.fs.FileInputSplit) Configuration(org.apache.flink.configuration.Configuration) IntValue(org.apache.flink.types.IntValue) DoubleValue(org.apache.flink.types.DoubleValue) LongValue(org.apache.flink.types.LongValue) Value(org.apache.flink.types.Value) StringValue(org.apache.flink.types.StringValue) IOException(java.io.IOException) Test(org.junit.Test)

Example 5 with Value

use of org.apache.flink.types.Value in project flink by apache.

the class GenericCsvInputFormatTest method readWithParseQuotedStrings.

@Test
public void readWithParseQuotedStrings() {
    try {
        final String fileContent = "\"ab\\\"c\"|\"def\"\n\"ghijk\"|\"abc\"";
        final FileInputSplit split = createTempFile(fileContent);
        final Configuration parameters = new Configuration();
        format.setFieldDelimiter("|");
        format.setFieldTypesGeneric(StringValue.class, StringValue.class);
        format.enableQuotedStringParsing('"');
        format.configure(parameters);
        format.open(split);
        Value[] values = new Value[] { new StringValue(), new StringValue() };
        values = format.nextRecord(values);
        assertNotNull(values);
        assertEquals("ab\\\"c", ((StringValue) values[0]).getValue());
        assertEquals("def", ((StringValue) values[1]).getValue());
        values = format.nextRecord(values);
        assertNotNull(values);
        assertEquals("ghijk", ((StringValue) values[0]).getValue());
        assertEquals("abc", ((StringValue) values[1]).getValue());
    } catch (Exception ex) {
        fail("Test failed due to a " + ex.getClass().getSimpleName() + ": " + ex.getMessage());
    }
}
Also used : FileInputSplit(org.apache.flink.core.fs.FileInputSplit) Configuration(org.apache.flink.configuration.Configuration) IntValue(org.apache.flink.types.IntValue) DoubleValue(org.apache.flink.types.DoubleValue) LongValue(org.apache.flink.types.LongValue) Value(org.apache.flink.types.Value) StringValue(org.apache.flink.types.StringValue) StringValue(org.apache.flink.types.StringValue) IOException(java.io.IOException) Test(org.junit.Test)

Aggregations

Value (org.apache.flink.types.Value)28 Test (org.junit.Test)21 IntValue (org.apache.flink.types.IntValue)19 StringValue (org.apache.flink.types.StringValue)19 IOException (java.io.IOException)18 LongValue (org.apache.flink.types.LongValue)18 Configuration (org.apache.flink.configuration.Configuration)17 FileInputSplit (org.apache.flink.core.fs.FileInputSplit)17 DoubleValue (org.apache.flink.types.DoubleValue)17 HashMap (java.util.HashMap)5 Aggregator (org.apache.flink.api.common.aggregators.Aggregator)4 ArrayList (java.util.ArrayList)2 LinkedHashSet (java.util.LinkedHashSet)2 List (java.util.List)2 Map (java.util.Map)2 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)2 InvalidProgramException (org.apache.flink.api.common.InvalidProgramException)2 TaskInfo (org.apache.flink.api.common.TaskInfo)2 ConvergenceCriterion (org.apache.flink.api.common.aggregators.ConvergenceCriterion)2 RuntimeUDFContext (org.apache.flink.api.common.functions.util.RuntimeUDFContext)2