Search in sources :

Example 51 with StringValue

use of org.apache.flink.types.StringValue in project flink by apache.

the class TriangleListing method main.

public static void main(String[] args) throws Exception {
    // Set up the execution environment
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.getConfig().enableObjectReuse();
    ParameterTool parameters = ParameterTool.fromArgs(args);
    env.getConfig().setGlobalJobParameters(parameters);
    if (!parameters.has("directed")) {
        throw new ProgramParametrizationException(getUsage("must declare execution mode as '--directed true' or '--directed false'"));
    }
    boolean directedAlgorithm = parameters.getBoolean("directed");
    int little_parallelism = parameters.getInt("little_parallelism", PARALLELISM_DEFAULT);
    boolean triadic_census = parameters.getBoolean("triadic_census", DEFAULT_TRIADIC_CENSUS);
    GraphAnalytic tc = null;
    DataSet tl;
    switch(parameters.get("input", "")) {
        case "csv":
            {
                String lineDelimiter = StringEscapeUtils.unescapeJava(parameters.get("input_line_delimiter", CsvOutputFormat.DEFAULT_LINE_DELIMITER));
                String fieldDelimiter = StringEscapeUtils.unescapeJava(parameters.get("input_field_delimiter", CsvOutputFormat.DEFAULT_FIELD_DELIMITER));
                GraphCsvReader reader = Graph.fromCsvReader(parameters.getRequired("input_filename"), env).ignoreCommentsEdges("#").lineDelimiterEdges(lineDelimiter).fieldDelimiterEdges(fieldDelimiter);
                switch(parameters.get("type", "")) {
                    case "integer":
                        {
                            Graph<LongValue, NullValue, NullValue> graph = reader.keyType(LongValue.class);
                            if (directedAlgorithm) {
                                if (parameters.getBoolean("simplify", false)) {
                                    graph = graph.run(new org.apache.flink.graph.asm.simple.directed.Simplify<LongValue, NullValue, NullValue>().setParallelism(little_parallelism));
                                }
                                if (triadic_census) {
                                    tc = graph.run(new org.apache.flink.graph.library.clustering.directed.TriadicCensus<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
                                }
                                tl = graph.run(new org.apache.flink.graph.library.clustering.directed.TriangleListing<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
                            } else {
                                if (parameters.getBoolean("simplify", false)) {
                                    graph = graph.run(new org.apache.flink.graph.asm.simple.undirected.Simplify<LongValue, NullValue, NullValue>(false).setParallelism(little_parallelism));
                                }
                                if (triadic_census) {
                                    tc = graph.run(new org.apache.flink.graph.library.clustering.undirected.TriadicCensus<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
                                }
                                tl = graph.run(new org.apache.flink.graph.library.clustering.undirected.TriangleListing<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
                            }
                        }
                        break;
                    case "string":
                        {
                            Graph<StringValue, NullValue, NullValue> graph = reader.keyType(StringValue.class);
                            if (directedAlgorithm) {
                                if (parameters.getBoolean("simplify", false)) {
                                    graph = graph.run(new org.apache.flink.graph.asm.simple.directed.Simplify<StringValue, NullValue, NullValue>().setParallelism(little_parallelism));
                                }
                                if (triadic_census) {
                                    tc = graph.run(new org.apache.flink.graph.library.clustering.directed.TriadicCensus<StringValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
                                }
                                tl = graph.run(new org.apache.flink.graph.library.clustering.directed.TriangleListing<StringValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
                            } else {
                                if (parameters.getBoolean("simplify", false)) {
                                    graph = graph.run(new org.apache.flink.graph.asm.simple.undirected.Simplify<StringValue, NullValue, NullValue>(false).setParallelism(little_parallelism));
                                }
                                if (triadic_census) {
                                    tc = graph.run(new org.apache.flink.graph.library.clustering.undirected.TriadicCensus<StringValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
                                }
                                tl = graph.run(new org.apache.flink.graph.library.clustering.undirected.TriangleListing<StringValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
                            }
                        }
                        break;
                    default:
                        throw new ProgramParametrizationException(getUsage("invalid CSV type"));
                }
            }
            break;
        case "rmat":
            {
                int scale = parameters.getInt("scale", DEFAULT_SCALE);
                int edgeFactor = parameters.getInt("edge_factor", DEFAULT_EDGE_FACTOR);
                RandomGenerableFactory<JDKRandomGenerator> rnd = new JDKRandomGeneratorFactory();
                long vertexCount = 1L << scale;
                long edgeCount = vertexCount * edgeFactor;
                Graph<LongValue, NullValue, NullValue> graph = new RMatGraph<>(env, rnd, vertexCount, edgeCount).generate();
                if (directedAlgorithm) {
                    if (scale > 32) {
                        Graph<LongValue, NullValue, NullValue> simpleGraph = graph.run(new org.apache.flink.graph.asm.simple.directed.Simplify<LongValue, NullValue, NullValue>().setParallelism(little_parallelism));
                        if (triadic_census) {
                            tc = simpleGraph.run(new org.apache.flink.graph.library.clustering.directed.TriadicCensus<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
                        }
                        tl = simpleGraph.run(new org.apache.flink.graph.library.clustering.directed.TriangleListing<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
                    } else {
                        Graph<LongValue, NullValue, NullValue> simpleGraph = graph.run(new org.apache.flink.graph.asm.simple.directed.Simplify<LongValue, NullValue, NullValue>().setParallelism(little_parallelism));
                        if (triadic_census) {
                            tc = simpleGraph.run(new org.apache.flink.graph.library.clustering.directed.TriadicCensus<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
                        }
                        tl = simpleGraph.run(new org.apache.flink.graph.library.clustering.directed.TriangleListing<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
                    }
                } else {
                    boolean clipAndFlip = parameters.getBoolean("clip_and_flip", DEFAULT_CLIP_AND_FLIP);
                    if (scale > 32) {
                        Graph<LongValue, NullValue, NullValue> simpleGraph = graph.run(new org.apache.flink.graph.asm.simple.undirected.Simplify<LongValue, NullValue, NullValue>(clipAndFlip).setParallelism(little_parallelism));
                        if (triadic_census) {
                            tc = simpleGraph.run(new org.apache.flink.graph.library.clustering.undirected.TriadicCensus<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
                        }
                        tl = simpleGraph.run(new org.apache.flink.graph.library.clustering.undirected.TriangleListing<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
                    } else {
                        Graph<IntValue, NullValue, NullValue> simpleGraph = graph.run(new TranslateGraphIds<LongValue, IntValue, NullValue, NullValue>(new LongValueToUnsignedIntValue()).setParallelism(little_parallelism)).run(new org.apache.flink.graph.asm.simple.undirected.Simplify<IntValue, NullValue, NullValue>(clipAndFlip).setParallelism(little_parallelism));
                        if (triadic_census) {
                            tc = simpleGraph.run(new org.apache.flink.graph.library.clustering.undirected.TriadicCensus<IntValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
                        }
                        tl = simpleGraph.run(new org.apache.flink.graph.library.clustering.undirected.TriangleListing<IntValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
                    }
                }
            }
            break;
        default:
            throw new ProgramParametrizationException(getUsage("invalid input type"));
    }
    switch(parameters.get("output", "")) {
        case "print":
            System.out.println();
            if (directedAlgorithm) {
                for (Object e : tl.collect()) {
                    org.apache.flink.graph.library.clustering.directed.TriangleListing.Result result = (org.apache.flink.graph.library.clustering.directed.TriangleListing.Result) e;
                    System.out.println(result.toPrintableString());
                }
            } else {
                tl.print();
            }
            break;
        case "hash":
            System.out.println();
            System.out.println(DataSetUtils.checksumHashCode(tl));
            break;
        case "csv":
            String filename = parameters.getRequired("output_filename");
            String lineDelimiter = StringEscapeUtils.unescapeJava(parameters.get("output_line_delimiter", CsvOutputFormat.DEFAULT_LINE_DELIMITER));
            String fieldDelimiter = StringEscapeUtils.unescapeJava(parameters.get("output_field_delimiter", CsvOutputFormat.DEFAULT_FIELD_DELIMITER));
            tl.writeAsCsv(filename, lineDelimiter, fieldDelimiter);
            env.execute();
            break;
        default:
            throw new ProgramParametrizationException(getUsage("invalid output type"));
    }
    if (tc != null) {
        System.out.print("Triadic census:\n  ");
        System.out.println(tc.getResult().toString().replace(";", "\n "));
    }
    JobExecutionResult result = env.getLastJobExecutionResult();
    NumberFormat nf = NumberFormat.getInstance();
    System.out.println();
    System.out.println("Execution runtime: " + nf.format(result.getNetRuntime()) + " ms");
}
Also used : RandomGenerableFactory(org.apache.flink.graph.generator.random.RandomGenerableFactory) DataSet(org.apache.flink.api.java.DataSet) GraphAnalytic(org.apache.flink.graph.GraphAnalytic) JobExecutionResult(org.apache.flink.api.common.JobExecutionResult) NullValue(org.apache.flink.types.NullValue) StringValue(org.apache.flink.types.StringValue) LongValueToUnsignedIntValue(org.apache.flink.graph.asm.translate.translators.LongValueToUnsignedIntValue) IntValue(org.apache.flink.types.IntValue) JobExecutionResult(org.apache.flink.api.common.JobExecutionResult) GraphCsvReader(org.apache.flink.graph.GraphCsvReader) RMatGraph(org.apache.flink.graph.generator.RMatGraph) RMatGraph(org.apache.flink.graph.generator.RMatGraph) Graph(org.apache.flink.graph.Graph) LongValue(org.apache.flink.types.LongValue) ParameterTool(org.apache.flink.api.java.utils.ParameterTool) LongValueToUnsignedIntValue(org.apache.flink.graph.asm.translate.translators.LongValueToUnsignedIntValue) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) JDKRandomGeneratorFactory(org.apache.flink.graph.generator.random.JDKRandomGeneratorFactory) TranslateGraphIds(org.apache.flink.graph.asm.translate.TranslateGraphIds) ProgramParametrizationException(org.apache.flink.client.program.ProgramParametrizationException) NumberFormat(java.text.NumberFormat)

Example 52 with StringValue

use of org.apache.flink.types.StringValue in project flink by apache.

the class ValueCollectionDataSets method getPojoWithDateAndEnum.

public static DataSet<PojoWithDateAndEnum> getPojoWithDateAndEnum(ExecutionEnvironment env) {
    List<PojoWithDateAndEnum> data = new ArrayList<PojoWithDateAndEnum>();
    PojoWithDateAndEnum one = new PojoWithDateAndEnum();
    one.group = new StringValue("a");
    one.date = new Date(666);
    one.cat = Category.CAT_A;
    data.add(one);
    PojoWithDateAndEnum two = new PojoWithDateAndEnum();
    two.group = new StringValue("a");
    two.date = new Date(666);
    two.cat = Category.CAT_A;
    data.add(two);
    PojoWithDateAndEnum three = new PojoWithDateAndEnum();
    three.group = new StringValue("b");
    three.date = new Date(666);
    three.cat = Category.CAT_B;
    data.add(three);
    return env.fromCollection(data);
}
Also used : ArrayList(java.util.ArrayList) StringValue(org.apache.flink.types.StringValue) Date(java.util.Date)

Example 53 with StringValue

use of org.apache.flink.types.StringValue in project flink by apache.

the class ValueCollectionDataSets method getStringDataSet.

public static DataSet<StringValue> getStringDataSet(ExecutionEnvironment env) {
    List<StringValue> data = new ArrayList<>();
    data.add(new StringValue("Hi"));
    data.add(new StringValue("Hello"));
    data.add(new StringValue("Hello world"));
    data.add(new StringValue("Hello world, how are you?"));
    data.add(new StringValue("I am fine."));
    data.add(new StringValue("Luke Skywalker"));
    data.add(new StringValue("Random comment"));
    data.add(new StringValue("LOL"));
    Collections.shuffle(data);
    return env.fromCollection(data);
}
Also used : ArrayList(java.util.ArrayList) StringValue(org.apache.flink.types.StringValue)

Example 54 with StringValue

use of org.apache.flink.types.StringValue in project flink by apache.

the class OutputEmitterTest method testBroadcast.

@Test
public void testBroadcast() {
    // Test for IntValue
    @SuppressWarnings({ "unchecked", "rawtypes" }) final TypeComparator<Record> intComp = new RecordComparatorFactory(new int[] { 0 }, new Class[] { IntValue.class }).createComparator();
    final ChannelSelector<SerializationDelegate<Record>> oe1 = new OutputEmitter<Record>(ShipStrategyType.BROADCAST, intComp);
    final SerializationDelegate<Record> delegate = new SerializationDelegate<Record>(new RecordSerializerFactory().getSerializer());
    int numChannels = 100;
    int numRecords = 50000;
    int[] hit = new int[numChannels];
    for (int i = 0; i < numRecords; i++) {
        IntValue k = new IntValue(i);
        Record rec = new Record(k);
        delegate.setInstance(rec);
        int[] chans = oe1.selectChannels(delegate, hit.length);
        for (int chan : chans) {
            hit[chan]++;
        }
    }
    for (int aHit : hit) {
        assertTrue(aHit + "", aHit == numRecords);
    }
    // Test for StringValue
    @SuppressWarnings({ "unchecked", "rawtypes" }) final TypeComparator<Record> stringComp = new RecordComparatorFactory(new int[] { 0 }, new Class[] { StringValue.class }).createComparator();
    final ChannelSelector<SerializationDelegate<Record>> oe2 = new OutputEmitter<Record>(ShipStrategyType.BROADCAST, stringComp);
    numChannels = 100;
    numRecords = 5000;
    hit = new int[numChannels];
    for (int i = 0; i < numRecords; i++) {
        StringValue k = new StringValue(i + "");
        Record rec = new Record(k);
        delegate.setInstance(rec);
        int[] chans = oe2.selectChannels(delegate, hit.length);
        for (int chan : chans) {
            hit[chan]++;
        }
    }
    for (int aHit : hit) {
        assertTrue(aHit + "", aHit == numRecords);
    }
}
Also used : RecordComparatorFactory(org.apache.flink.runtime.testutils.recordutils.RecordComparatorFactory) RecordSerializerFactory(org.apache.flink.runtime.testutils.recordutils.RecordSerializerFactory) SerializationDelegate(org.apache.flink.runtime.plugable.SerializationDelegate) OutputEmitter(org.apache.flink.runtime.operators.shipping.OutputEmitter) Record(org.apache.flink.types.Record) StringValue(org.apache.flink.types.StringValue) IntValue(org.apache.flink.types.IntValue) Test(org.junit.Test)

Example 55 with StringValue

use of org.apache.flink.types.StringValue in project flink by apache.

the class GenericCsvInputFormatTest method testReadInvalidContents.

@Test
public void testReadInvalidContents() throws IOException {
    try {
        final String fileContent = "abc|222|def|444\nkkz|777|888|hhg";
        final FileInputSplit split = createTempFile(fileContent);
        final Configuration parameters = new Configuration();
        format.setFieldDelimiter("|");
        format.setFieldTypesGeneric(StringValue.class, IntValue.class, StringValue.class, IntValue.class);
        format.configure(parameters);
        format.open(split);
        Value[] values = new Value[] { new StringValue(), new IntValue(), new StringValue(), new IntValue() };
        assertNotNull(format.nextRecord(values));
        try {
            format.nextRecord(values);
            fail("Input format accepted on invalid input.");
        } catch (ParseException ignored) {
        }
    } catch (Exception ex) {
        fail("Test failed due to a " + ex.getClass().getSimpleName() + ": " + ex.getMessage());
    }
}
Also used : FileInputSplit(org.apache.flink.core.fs.FileInputSplit) Configuration(org.apache.flink.configuration.Configuration) IntValue(org.apache.flink.types.IntValue) DoubleValue(org.apache.flink.types.DoubleValue) LongValue(org.apache.flink.types.LongValue) Value(org.apache.flink.types.Value) StringValue(org.apache.flink.types.StringValue) StringValue(org.apache.flink.types.StringValue) IntValue(org.apache.flink.types.IntValue) IOException(java.io.IOException) Test(org.junit.Test)

Aggregations

StringValue (org.apache.flink.types.StringValue)88 Test (org.junit.Test)61 IntValue (org.apache.flink.types.IntValue)35 LongValue (org.apache.flink.types.LongValue)21 IOException (java.io.IOException)17 ArrayList (java.util.ArrayList)15 Record (org.apache.flink.types.Record)13 TupleTypeInfo (org.apache.flink.api.java.typeutils.TupleTypeInfo)12 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)11 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)11 DoubleValue (org.apache.flink.types.DoubleValue)11 Value (org.apache.flink.types.Value)10 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)9 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)7 Plan (org.apache.flink.api.common.Plan)7 Configuration (org.apache.flink.configuration.Configuration)7 FileInputSplit (org.apache.flink.core.fs.FileInputSplit)7 NoSuchElementException (java.util.NoSuchElementException)6 File (java.io.File)5 JobExecutionResult (org.apache.flink.api.common.JobExecutionResult)5