use of org.apache.flink.types.StringValue in project flink by apache.
the class TriangleListing method main.
public static void main(String[] args) throws Exception {
// Set up the execution environment
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.getConfig().enableObjectReuse();
ParameterTool parameters = ParameterTool.fromArgs(args);
env.getConfig().setGlobalJobParameters(parameters);
if (!parameters.has("directed")) {
throw new ProgramParametrizationException(getUsage("must declare execution mode as '--directed true' or '--directed false'"));
}
boolean directedAlgorithm = parameters.getBoolean("directed");
int little_parallelism = parameters.getInt("little_parallelism", PARALLELISM_DEFAULT);
boolean triadic_census = parameters.getBoolean("triadic_census", DEFAULT_TRIADIC_CENSUS);
GraphAnalytic tc = null;
DataSet tl;
switch(parameters.get("input", "")) {
case "csv":
{
String lineDelimiter = StringEscapeUtils.unescapeJava(parameters.get("input_line_delimiter", CsvOutputFormat.DEFAULT_LINE_DELIMITER));
String fieldDelimiter = StringEscapeUtils.unescapeJava(parameters.get("input_field_delimiter", CsvOutputFormat.DEFAULT_FIELD_DELIMITER));
GraphCsvReader reader = Graph.fromCsvReader(parameters.getRequired("input_filename"), env).ignoreCommentsEdges("#").lineDelimiterEdges(lineDelimiter).fieldDelimiterEdges(fieldDelimiter);
switch(parameters.get("type", "")) {
case "integer":
{
Graph<LongValue, NullValue, NullValue> graph = reader.keyType(LongValue.class);
if (directedAlgorithm) {
if (parameters.getBoolean("simplify", false)) {
graph = graph.run(new org.apache.flink.graph.asm.simple.directed.Simplify<LongValue, NullValue, NullValue>().setParallelism(little_parallelism));
}
if (triadic_census) {
tc = graph.run(new org.apache.flink.graph.library.clustering.directed.TriadicCensus<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
}
tl = graph.run(new org.apache.flink.graph.library.clustering.directed.TriangleListing<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
} else {
if (parameters.getBoolean("simplify", false)) {
graph = graph.run(new org.apache.flink.graph.asm.simple.undirected.Simplify<LongValue, NullValue, NullValue>(false).setParallelism(little_parallelism));
}
if (triadic_census) {
tc = graph.run(new org.apache.flink.graph.library.clustering.undirected.TriadicCensus<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
}
tl = graph.run(new org.apache.flink.graph.library.clustering.undirected.TriangleListing<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
}
}
break;
case "string":
{
Graph<StringValue, NullValue, NullValue> graph = reader.keyType(StringValue.class);
if (directedAlgorithm) {
if (parameters.getBoolean("simplify", false)) {
graph = graph.run(new org.apache.flink.graph.asm.simple.directed.Simplify<StringValue, NullValue, NullValue>().setParallelism(little_parallelism));
}
if (triadic_census) {
tc = graph.run(new org.apache.flink.graph.library.clustering.directed.TriadicCensus<StringValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
}
tl = graph.run(new org.apache.flink.graph.library.clustering.directed.TriangleListing<StringValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
} else {
if (parameters.getBoolean("simplify", false)) {
graph = graph.run(new org.apache.flink.graph.asm.simple.undirected.Simplify<StringValue, NullValue, NullValue>(false).setParallelism(little_parallelism));
}
if (triadic_census) {
tc = graph.run(new org.apache.flink.graph.library.clustering.undirected.TriadicCensus<StringValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
}
tl = graph.run(new org.apache.flink.graph.library.clustering.undirected.TriangleListing<StringValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
}
}
break;
default:
throw new ProgramParametrizationException(getUsage("invalid CSV type"));
}
}
break;
case "rmat":
{
int scale = parameters.getInt("scale", DEFAULT_SCALE);
int edgeFactor = parameters.getInt("edge_factor", DEFAULT_EDGE_FACTOR);
RandomGenerableFactory<JDKRandomGenerator> rnd = new JDKRandomGeneratorFactory();
long vertexCount = 1L << scale;
long edgeCount = vertexCount * edgeFactor;
Graph<LongValue, NullValue, NullValue> graph = new RMatGraph<>(env, rnd, vertexCount, edgeCount).generate();
if (directedAlgorithm) {
if (scale > 32) {
Graph<LongValue, NullValue, NullValue> simpleGraph = graph.run(new org.apache.flink.graph.asm.simple.directed.Simplify<LongValue, NullValue, NullValue>().setParallelism(little_parallelism));
if (triadic_census) {
tc = simpleGraph.run(new org.apache.flink.graph.library.clustering.directed.TriadicCensus<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
}
tl = simpleGraph.run(new org.apache.flink.graph.library.clustering.directed.TriangleListing<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
} else {
Graph<LongValue, NullValue, NullValue> simpleGraph = graph.run(new org.apache.flink.graph.asm.simple.directed.Simplify<LongValue, NullValue, NullValue>().setParallelism(little_parallelism));
if (triadic_census) {
tc = simpleGraph.run(new org.apache.flink.graph.library.clustering.directed.TriadicCensus<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
}
tl = simpleGraph.run(new org.apache.flink.graph.library.clustering.directed.TriangleListing<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
}
} else {
boolean clipAndFlip = parameters.getBoolean("clip_and_flip", DEFAULT_CLIP_AND_FLIP);
if (scale > 32) {
Graph<LongValue, NullValue, NullValue> simpleGraph = graph.run(new org.apache.flink.graph.asm.simple.undirected.Simplify<LongValue, NullValue, NullValue>(clipAndFlip).setParallelism(little_parallelism));
if (triadic_census) {
tc = simpleGraph.run(new org.apache.flink.graph.library.clustering.undirected.TriadicCensus<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
}
tl = simpleGraph.run(new org.apache.flink.graph.library.clustering.undirected.TriangleListing<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
} else {
Graph<IntValue, NullValue, NullValue> simpleGraph = graph.run(new TranslateGraphIds<LongValue, IntValue, NullValue, NullValue>(new LongValueToUnsignedIntValue()).setParallelism(little_parallelism)).run(new org.apache.flink.graph.asm.simple.undirected.Simplify<IntValue, NullValue, NullValue>(clipAndFlip).setParallelism(little_parallelism));
if (triadic_census) {
tc = simpleGraph.run(new org.apache.flink.graph.library.clustering.undirected.TriadicCensus<IntValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
}
tl = simpleGraph.run(new org.apache.flink.graph.library.clustering.undirected.TriangleListing<IntValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
}
}
}
break;
default:
throw new ProgramParametrizationException(getUsage("invalid input type"));
}
switch(parameters.get("output", "")) {
case "print":
System.out.println();
if (directedAlgorithm) {
for (Object e : tl.collect()) {
org.apache.flink.graph.library.clustering.directed.TriangleListing.Result result = (org.apache.flink.graph.library.clustering.directed.TriangleListing.Result) e;
System.out.println(result.toPrintableString());
}
} else {
tl.print();
}
break;
case "hash":
System.out.println();
System.out.println(DataSetUtils.checksumHashCode(tl));
break;
case "csv":
String filename = parameters.getRequired("output_filename");
String lineDelimiter = StringEscapeUtils.unescapeJava(parameters.get("output_line_delimiter", CsvOutputFormat.DEFAULT_LINE_DELIMITER));
String fieldDelimiter = StringEscapeUtils.unescapeJava(parameters.get("output_field_delimiter", CsvOutputFormat.DEFAULT_FIELD_DELIMITER));
tl.writeAsCsv(filename, lineDelimiter, fieldDelimiter);
env.execute();
break;
default:
throw new ProgramParametrizationException(getUsage("invalid output type"));
}
if (tc != null) {
System.out.print("Triadic census:\n ");
System.out.println(tc.getResult().toString().replace(";", "\n "));
}
JobExecutionResult result = env.getLastJobExecutionResult();
NumberFormat nf = NumberFormat.getInstance();
System.out.println();
System.out.println("Execution runtime: " + nf.format(result.getNetRuntime()) + " ms");
}
use of org.apache.flink.types.StringValue in project flink by apache.
the class ValueCollectionDataSets method getPojoWithDateAndEnum.
public static DataSet<PojoWithDateAndEnum> getPojoWithDateAndEnum(ExecutionEnvironment env) {
List<PojoWithDateAndEnum> data = new ArrayList<PojoWithDateAndEnum>();
PojoWithDateAndEnum one = new PojoWithDateAndEnum();
one.group = new StringValue("a");
one.date = new Date(666);
one.cat = Category.CAT_A;
data.add(one);
PojoWithDateAndEnum two = new PojoWithDateAndEnum();
two.group = new StringValue("a");
two.date = new Date(666);
two.cat = Category.CAT_A;
data.add(two);
PojoWithDateAndEnum three = new PojoWithDateAndEnum();
three.group = new StringValue("b");
three.date = new Date(666);
three.cat = Category.CAT_B;
data.add(three);
return env.fromCollection(data);
}
use of org.apache.flink.types.StringValue in project flink by apache.
the class ValueCollectionDataSets method getStringDataSet.
public static DataSet<StringValue> getStringDataSet(ExecutionEnvironment env) {
List<StringValue> data = new ArrayList<>();
data.add(new StringValue("Hi"));
data.add(new StringValue("Hello"));
data.add(new StringValue("Hello world"));
data.add(new StringValue("Hello world, how are you?"));
data.add(new StringValue("I am fine."));
data.add(new StringValue("Luke Skywalker"));
data.add(new StringValue("Random comment"));
data.add(new StringValue("LOL"));
Collections.shuffle(data);
return env.fromCollection(data);
}
use of org.apache.flink.types.StringValue in project flink by apache.
the class OutputEmitterTest method testBroadcast.
@Test
public void testBroadcast() {
// Test for IntValue
@SuppressWarnings({ "unchecked", "rawtypes" }) final TypeComparator<Record> intComp = new RecordComparatorFactory(new int[] { 0 }, new Class[] { IntValue.class }).createComparator();
final ChannelSelector<SerializationDelegate<Record>> oe1 = new OutputEmitter<Record>(ShipStrategyType.BROADCAST, intComp);
final SerializationDelegate<Record> delegate = new SerializationDelegate<Record>(new RecordSerializerFactory().getSerializer());
int numChannels = 100;
int numRecords = 50000;
int[] hit = new int[numChannels];
for (int i = 0; i < numRecords; i++) {
IntValue k = new IntValue(i);
Record rec = new Record(k);
delegate.setInstance(rec);
int[] chans = oe1.selectChannels(delegate, hit.length);
for (int chan : chans) {
hit[chan]++;
}
}
for (int aHit : hit) {
assertTrue(aHit + "", aHit == numRecords);
}
// Test for StringValue
@SuppressWarnings({ "unchecked", "rawtypes" }) final TypeComparator<Record> stringComp = new RecordComparatorFactory(new int[] { 0 }, new Class[] { StringValue.class }).createComparator();
final ChannelSelector<SerializationDelegate<Record>> oe2 = new OutputEmitter<Record>(ShipStrategyType.BROADCAST, stringComp);
numChannels = 100;
numRecords = 5000;
hit = new int[numChannels];
for (int i = 0; i < numRecords; i++) {
StringValue k = new StringValue(i + "");
Record rec = new Record(k);
delegate.setInstance(rec);
int[] chans = oe2.selectChannels(delegate, hit.length);
for (int chan : chans) {
hit[chan]++;
}
}
for (int aHit : hit) {
assertTrue(aHit + "", aHit == numRecords);
}
}
use of org.apache.flink.types.StringValue in project flink by apache.
the class GenericCsvInputFormatTest method testReadInvalidContents.
@Test
public void testReadInvalidContents() throws IOException {
try {
final String fileContent = "abc|222|def|444\nkkz|777|888|hhg";
final FileInputSplit split = createTempFile(fileContent);
final Configuration parameters = new Configuration();
format.setFieldDelimiter("|");
format.setFieldTypesGeneric(StringValue.class, IntValue.class, StringValue.class, IntValue.class);
format.configure(parameters);
format.open(split);
Value[] values = new Value[] { new StringValue(), new IntValue(), new StringValue(), new IntValue() };
assertNotNull(format.nextRecord(values));
try {
format.nextRecord(values);
fail("Input format accepted on invalid input.");
} catch (ParseException ignored) {
}
} catch (Exception ex) {
fail("Test failed due to a " + ex.getClass().getSimpleName() + ": " + ex.getMessage());
}
}
Aggregations