use of org.apache.flink.types.IntValue in project flink by apache.
the class NonReusingKeyGroupedIteratorTest method testMixedProgress.
@Test
public void testMixedProgress() throws Exception {
try {
// Progression only via nextKey() and hasNext() - Key 1, Value A
Assert.assertTrue("KeyGroupedIterator must have another key.", this.psi.nextKey());
Assert.assertTrue("KeyGroupedIterator must have another value.", this.psi.getValues().hasNext());
// Progression only through nextKey() - Key 2, Value B
Assert.assertTrue("KeyGroupedIterator must have another key.", this.psi.nextKey());
// Progression first though haNext() and next(), then through hasNext() - Key 3, Values C, D
Assert.assertTrue("KeyGroupedIterator must have another key.", this.psi.nextKey());
Assert.assertTrue("KeyGroupedIterator must have another value.", this.psi.getValues().hasNext());
Assert.assertTrue("KeyGroupedIterator returned a wrong key.", this.psi.getComparatorWithCurrentReference().equalToReference(new Record(new IntValue(3))));
Assert.assertEquals("KeyGroupedIterator returned a wrong key.", 3, this.psi.getCurrent().getField(0, IntValue.class).getValue());
Assert.assertEquals("KeyGroupedIterator returned a wrong value.", new StringValue("C"), this.psi.getValues().next().getField(1, StringValue.class));
Assert.assertTrue("KeyGroupedIterator must have another value.", this.psi.getValues().hasNext());
Assert.assertTrue("KeyGroupedIterator returned a wrong key.", this.psi.getComparatorWithCurrentReference().equalToReference(new Record(new IntValue(3))));
Assert.assertEquals("KeyGroupedIterator returned a wrong key.", 3, this.psi.getCurrent().getField(0, IntValue.class).getValue());
// Progression first via next() only, then hasNext() only Key 4, Values E, F, G
Assert.assertTrue("KeyGroupedIterator must have another key.", this.psi.nextKey());
Assert.assertEquals("KeyGroupedIterator returned a wrong value.", new StringValue("E"), this.psi.getValues().next().getField(1, StringValue.class));
Assert.assertTrue("KeyGroupedIterator must have another value.", this.psi.getValues().hasNext());
// Key 5, Values H, I, J, K, L
Assert.assertTrue("KeyGroupedIterator must have another key.", this.psi.nextKey());
Assert.assertEquals("KeyGroupedIterator returned a wrong value.", new StringValue("H"), this.psi.getValues().next().getField(1, StringValue.class));
Assert.assertTrue("KeyGroupedIterator must have another value.", this.psi.getValues().hasNext());
Assert.assertTrue("KeyGroupedIterator returned a wrong key.", this.psi.getComparatorWithCurrentReference().equalToReference(new Record(new IntValue(5))));
Assert.assertEquals("KeyGroupedIterator returned a wrong key.", 5, this.psi.getCurrent().getField(0, IntValue.class).getValue());
Assert.assertEquals("KeyGroupedIterator returned a wrong value.", new StringValue("I"), this.psi.getValues().next().getField(1, StringValue.class));
Assert.assertTrue("KeyGroupedIterator must have another value.", this.psi.getValues().hasNext());
// end
Assert.assertFalse("KeyGroupedIterator must not have another key.", this.psi.nextKey());
Assert.assertFalse("KeyGroupedIterator must not have another key.", this.psi.nextKey());
} catch (Exception e) {
e.printStackTrace();
Assert.fail("The test encountered an unexpected exception.");
}
}
use of org.apache.flink.types.IntValue in project flink by apache.
the class ClusteringCoefficient method main.
public static void main(String[] args) throws Exception {
// Set up the execution environment
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.getConfig().enableObjectReuse();
ParameterTool parameters = ParameterTool.fromArgs(args);
env.getConfig().setGlobalJobParameters(parameters);
if (!parameters.has("directed")) {
throw new ProgramParametrizationException(getUsage("must declare execution mode as '--directed true' or '--directed false'"));
}
boolean directedAlgorithm = parameters.getBoolean("directed");
int little_parallelism = parameters.getInt("little_parallelism", PARALLELISM_DEFAULT);
// global and local clustering coefficient results
GraphAnalytic gcc;
GraphAnalytic acc;
DataSet lcc;
switch(parameters.get("input", "")) {
case "csv":
{
String lineDelimiter = StringEscapeUtils.unescapeJava(parameters.get("input_line_delimiter", CsvOutputFormat.DEFAULT_LINE_DELIMITER));
String fieldDelimiter = StringEscapeUtils.unescapeJava(parameters.get("input_field_delimiter", CsvOutputFormat.DEFAULT_FIELD_DELIMITER));
GraphCsvReader reader = Graph.fromCsvReader(parameters.get("input_filename"), env).ignoreCommentsEdges("#").lineDelimiterEdges(lineDelimiter).fieldDelimiterEdges(fieldDelimiter);
switch(parameters.get("type", "")) {
case "integer":
{
Graph<LongValue, NullValue, NullValue> graph = reader.keyType(LongValue.class);
if (directedAlgorithm) {
if (parameters.getBoolean("simplify", false)) {
graph = graph.run(new org.apache.flink.graph.asm.simple.directed.Simplify<LongValue, NullValue, NullValue>().setParallelism(little_parallelism));
}
gcc = graph.run(new org.apache.flink.graph.library.clustering.directed.GlobalClusteringCoefficient<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
acc = graph.run(new org.apache.flink.graph.library.clustering.directed.AverageClusteringCoefficient<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
lcc = graph.run(new org.apache.flink.graph.library.clustering.directed.LocalClusteringCoefficient<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
} else {
if (parameters.getBoolean("simplify", false)) {
graph = graph.run(new org.apache.flink.graph.asm.simple.undirected.Simplify<LongValue, NullValue, NullValue>(false).setParallelism(little_parallelism));
}
gcc = graph.run(new org.apache.flink.graph.library.clustering.undirected.GlobalClusteringCoefficient<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
acc = graph.run(new org.apache.flink.graph.library.clustering.undirected.AverageClusteringCoefficient<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
lcc = graph.run(new org.apache.flink.graph.library.clustering.undirected.LocalClusteringCoefficient<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
}
}
break;
case "string":
{
Graph<StringValue, NullValue, NullValue> graph = reader.keyType(StringValue.class);
if (directedAlgorithm) {
if (parameters.getBoolean("simplify", false)) {
graph = graph.run(new org.apache.flink.graph.asm.simple.directed.Simplify<StringValue, NullValue, NullValue>().setParallelism(little_parallelism));
}
gcc = graph.run(new org.apache.flink.graph.library.clustering.directed.GlobalClusteringCoefficient<StringValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
acc = graph.run(new org.apache.flink.graph.library.clustering.directed.AverageClusteringCoefficient<StringValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
lcc = graph.run(new org.apache.flink.graph.library.clustering.directed.LocalClusteringCoefficient<StringValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
} else {
if (parameters.getBoolean("simplify", false)) {
graph = graph.run(new org.apache.flink.graph.asm.simple.undirected.Simplify<StringValue, NullValue, NullValue>(false).setParallelism(little_parallelism));
}
gcc = graph.run(new org.apache.flink.graph.library.clustering.undirected.GlobalClusteringCoefficient<StringValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
acc = graph.run(new org.apache.flink.graph.library.clustering.undirected.AverageClusteringCoefficient<StringValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
lcc = graph.run(new org.apache.flink.graph.library.clustering.undirected.LocalClusteringCoefficient<StringValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
}
}
break;
default:
throw new ProgramParametrizationException(getUsage("invalid CSV type"));
}
}
break;
case "rmat":
{
int scale = parameters.getInt("scale", DEFAULT_SCALE);
int edgeFactor = parameters.getInt("edge_factor", DEFAULT_EDGE_FACTOR);
RandomGenerableFactory<JDKRandomGenerator> rnd = new JDKRandomGeneratorFactory();
long vertexCount = 1L << scale;
long edgeCount = vertexCount * edgeFactor;
Graph<LongValue, NullValue, NullValue> graph = new RMatGraph<>(env, rnd, vertexCount, edgeCount).setParallelism(little_parallelism).generate();
if (directedAlgorithm) {
if (scale > 32) {
Graph<LongValue, NullValue, NullValue> newGraph = graph.run(new org.apache.flink.graph.asm.simple.directed.Simplify<LongValue, NullValue, NullValue>().setParallelism(little_parallelism));
gcc = newGraph.run(new org.apache.flink.graph.library.clustering.directed.GlobalClusteringCoefficient<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
acc = newGraph.run(new org.apache.flink.graph.library.clustering.directed.AverageClusteringCoefficient<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
lcc = newGraph.run(new org.apache.flink.graph.library.clustering.directed.LocalClusteringCoefficient<LongValue, NullValue, NullValue>().setIncludeZeroDegreeVertices(false).setLittleParallelism(little_parallelism));
} else {
Graph<IntValue, NullValue, NullValue> newGraph = graph.run(new TranslateGraphIds<LongValue, IntValue, NullValue, NullValue>(new LongValueToUnsignedIntValue()).setParallelism(little_parallelism)).run(new org.apache.flink.graph.asm.simple.directed.Simplify<IntValue, NullValue, NullValue>().setParallelism(little_parallelism));
gcc = newGraph.run(new org.apache.flink.graph.library.clustering.directed.GlobalClusteringCoefficient<IntValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
acc = newGraph.run(new org.apache.flink.graph.library.clustering.directed.AverageClusteringCoefficient<IntValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
lcc = newGraph.run(new org.apache.flink.graph.library.clustering.directed.LocalClusteringCoefficient<IntValue, NullValue, NullValue>().setIncludeZeroDegreeVertices(false).setLittleParallelism(little_parallelism));
}
} else {
boolean clipAndFlip = parameters.getBoolean("clip_and_flip", DEFAULT_CLIP_AND_FLIP);
if (scale > 32) {
Graph<LongValue, NullValue, NullValue> newGraph = graph.run(new org.apache.flink.graph.asm.simple.undirected.Simplify<LongValue, NullValue, NullValue>(clipAndFlip).setParallelism(little_parallelism));
gcc = newGraph.run(new org.apache.flink.graph.library.clustering.undirected.GlobalClusteringCoefficient<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
acc = newGraph.run(new org.apache.flink.graph.library.clustering.undirected.AverageClusteringCoefficient<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
lcc = newGraph.run(new org.apache.flink.graph.library.clustering.undirected.LocalClusteringCoefficient<LongValue, NullValue, NullValue>().setIncludeZeroDegreeVertices(false).setLittleParallelism(little_parallelism));
} else {
Graph<IntValue, NullValue, NullValue> newGraph = graph.run(new TranslateGraphIds<LongValue, IntValue, NullValue, NullValue>(new LongValueToUnsignedIntValue()).setParallelism(little_parallelism)).run(new org.apache.flink.graph.asm.simple.undirected.Simplify<IntValue, NullValue, NullValue>(clipAndFlip).setParallelism(little_parallelism));
gcc = newGraph.run(new org.apache.flink.graph.library.clustering.undirected.GlobalClusteringCoefficient<IntValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
acc = newGraph.run(new org.apache.flink.graph.library.clustering.undirected.AverageClusteringCoefficient<IntValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
lcc = newGraph.run(new org.apache.flink.graph.library.clustering.undirected.LocalClusteringCoefficient<IntValue, NullValue, NullValue>().setIncludeZeroDegreeVertices(false).setLittleParallelism(little_parallelism));
}
}
}
break;
default:
throw new ProgramParametrizationException(getUsage("invalid input type"));
}
switch(parameters.get("output", "")) {
case "print":
if (directedAlgorithm) {
for (Object e : lcc.collect()) {
org.apache.flink.graph.library.clustering.directed.LocalClusteringCoefficient.Result result = (org.apache.flink.graph.library.clustering.directed.LocalClusteringCoefficient.Result) e;
System.out.println(result.toPrintableString());
}
} else {
for (Object e : lcc.collect()) {
org.apache.flink.graph.library.clustering.undirected.LocalClusteringCoefficient.Result result = (org.apache.flink.graph.library.clustering.undirected.LocalClusteringCoefficient.Result) e;
System.out.println(result.toPrintableString());
}
}
break;
case "hash":
System.out.println(DataSetUtils.checksumHashCode(lcc));
break;
case "csv":
String filename = parameters.get("output_filename");
String lineDelimiter = StringEscapeUtils.unescapeJava(parameters.get("output_line_delimiter", CsvOutputFormat.DEFAULT_LINE_DELIMITER));
String fieldDelimiter = StringEscapeUtils.unescapeJava(parameters.get("output_field_delimiter", CsvOutputFormat.DEFAULT_FIELD_DELIMITER));
lcc.writeAsCsv(filename, lineDelimiter, fieldDelimiter);
env.execute("Clustering Coefficient");
break;
default:
throw new ProgramParametrizationException(getUsage("invalid output type"));
}
System.out.println(gcc.getResult());
System.out.println(acc.getResult());
JobExecutionResult result = env.getLastJobExecutionResult();
NumberFormat nf = NumberFormat.getInstance();
System.out.println("Execution runtime: " + nf.format(result.getNetRuntime()) + " ms");
}
use of org.apache.flink.types.IntValue in project flink by apache.
the class HITS method main.
public static void main(String[] args) throws Exception {
// Set up the execution environment
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.getConfig().enableObjectReuse();
ParameterTool parameters = ParameterTool.fromArgs(args);
env.getConfig().setGlobalJobParameters(parameters);
int iterations = parameters.getInt("iterations", DEFAULT_ITERATIONS);
DataSet hits;
switch(parameters.get("input", "")) {
case "csv":
{
String lineDelimiter = StringEscapeUtils.unescapeJava(parameters.get("input_line_delimiter", CsvOutputFormat.DEFAULT_LINE_DELIMITER));
String fieldDelimiter = StringEscapeUtils.unescapeJava(parameters.get("input_field_delimiter", CsvOutputFormat.DEFAULT_FIELD_DELIMITER));
GraphCsvReader reader = Graph.fromCsvReader(parameters.getRequired("input_filename"), env).ignoreCommentsEdges("#").lineDelimiterEdges(lineDelimiter).fieldDelimiterEdges(fieldDelimiter);
switch(parameters.get("type", "")) {
case "integer":
{
hits = reader.keyType(LongValue.class).run(new org.apache.flink.graph.library.link_analysis.HITS<LongValue, NullValue, NullValue>(iterations));
}
break;
case "string":
{
hits = reader.keyType(StringValue.class).run(new org.apache.flink.graph.library.link_analysis.HITS<StringValue, NullValue, NullValue>(iterations));
}
break;
default:
throw new ProgramParametrizationException(getUsage("invalid CSV type"));
}
}
break;
case "rmat":
{
int scale = parameters.getInt("scale", DEFAULT_SCALE);
int edgeFactor = parameters.getInt("edge_factor", DEFAULT_EDGE_FACTOR);
RandomGenerableFactory<JDKRandomGenerator> rnd = new JDKRandomGeneratorFactory();
long vertexCount = 1L << scale;
long edgeCount = vertexCount * edgeFactor;
Graph<LongValue, NullValue, NullValue> graph = new RMatGraph<>(env, rnd, vertexCount, edgeCount).generate();
if (scale > 32) {
hits = graph.run(new Simplify<LongValue, NullValue, NullValue>()).run(new org.apache.flink.graph.library.link_analysis.HITS<LongValue, NullValue, NullValue>(iterations));
} else {
hits = graph.run(new TranslateGraphIds<LongValue, IntValue, NullValue, NullValue>(new LongValueToUnsignedIntValue())).run(new Simplify<IntValue, NullValue, NullValue>()).run(new org.apache.flink.graph.library.link_analysis.HITS<IntValue, NullValue, NullValue>(iterations));
}
}
break;
default:
throw new ProgramParametrizationException(getUsage("invalid input type"));
}
switch(parameters.get("output", "")) {
case "print":
System.out.println();
for (Object e : hits.collect()) {
System.out.println(((Result) e).toPrintableString());
}
break;
case "hash":
System.out.println();
System.out.println(DataSetUtils.checksumHashCode(hits));
break;
case "csv":
String filename = parameters.getRequired("output_filename");
String lineDelimiter = StringEscapeUtils.unescapeJava(parameters.get("output_line_delimiter", CsvOutputFormat.DEFAULT_LINE_DELIMITER));
String fieldDelimiter = StringEscapeUtils.unescapeJava(parameters.get("output_field_delimiter", CsvOutputFormat.DEFAULT_FIELD_DELIMITER));
hits.writeAsCsv(filename, lineDelimiter, fieldDelimiter);
env.execute("HITS");
break;
default:
throw new ProgramParametrizationException(getUsage("invalid output type"));
}
JobExecutionResult result = env.getLastJobExecutionResult();
NumberFormat nf = NumberFormat.getInstance();
System.out.println();
System.out.println("Execution runtime: " + nf.format(result.getNetRuntime()) + " ms");
}
use of org.apache.flink.types.IntValue in project flink by apache.
the class JaccardIndex method main.
public static void main(String[] args) throws Exception {
// Set up the execution environment
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.getConfig().enableObjectReuse();
ParameterTool parameters = ParameterTool.fromArgs(args);
env.getConfig().setGlobalJobParameters(parameters);
int little_parallelism = parameters.getInt("little_parallelism", PARALLELISM_DEFAULT);
DataSet ji;
switch(parameters.get("input", "")) {
case "csv":
{
String lineDelimiter = StringEscapeUtils.unescapeJava(parameters.get("input_line_delimiter", CsvOutputFormat.DEFAULT_LINE_DELIMITER));
String fieldDelimiter = StringEscapeUtils.unescapeJava(parameters.get("input_field_delimiter", CsvOutputFormat.DEFAULT_FIELD_DELIMITER));
GraphCsvReader reader = Graph.fromCsvReader(parameters.getRequired("input_filename"), env).ignoreCommentsEdges("#").lineDelimiterEdges(lineDelimiter).fieldDelimiterEdges(fieldDelimiter);
switch(parameters.get("type", "")) {
case "integer":
{
Graph<LongValue, NullValue, NullValue> graph = reader.keyType(LongValue.class);
if (parameters.getBoolean("simplify", false)) {
graph = graph.run(new org.apache.flink.graph.asm.simple.undirected.Simplify<LongValue, NullValue, NullValue>(false).setParallelism(little_parallelism));
}
ji = graph.run(new org.apache.flink.graph.library.similarity.JaccardIndex<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
}
break;
case "string":
{
Graph<StringValue, NullValue, NullValue> graph = reader.keyType(StringValue.class);
if (parameters.getBoolean("simplify", false)) {
graph = graph.run(new org.apache.flink.graph.asm.simple.undirected.Simplify<StringValue, NullValue, NullValue>(false).setParallelism(little_parallelism));
}
ji = graph.run(new org.apache.flink.graph.library.similarity.JaccardIndex<StringValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
}
break;
default:
throw new ProgramParametrizationException(getUsage("invalid CSV type"));
}
}
break;
case "rmat":
{
int scale = parameters.getInt("scale", DEFAULT_SCALE);
int edgeFactor = parameters.getInt("edge_factor", DEFAULT_EDGE_FACTOR);
RandomGenerableFactory<JDKRandomGenerator> rnd = new JDKRandomGeneratorFactory();
long vertexCount = 1L << scale;
long edgeCount = vertexCount * edgeFactor;
Graph<LongValue, NullValue, NullValue> graph = new RMatGraph<>(env, rnd, vertexCount, edgeCount).setParallelism(little_parallelism).generate();
boolean clipAndFlip = parameters.getBoolean("clip_and_flip", DEFAULT_CLIP_AND_FLIP);
if (scale > 32) {
ji = graph.run(new Simplify<LongValue, NullValue, NullValue>(clipAndFlip).setParallelism(little_parallelism)).run(new org.apache.flink.graph.library.similarity.JaccardIndex<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
} else {
ji = graph.run(new TranslateGraphIds<LongValue, IntValue, NullValue, NullValue>(new LongValueToUnsignedIntValue()).setParallelism(little_parallelism)).run(new Simplify<IntValue, NullValue, NullValue>(clipAndFlip).setParallelism(little_parallelism)).run(new org.apache.flink.graph.library.similarity.JaccardIndex<IntValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
}
}
break;
default:
throw new ProgramParametrizationException(getUsage("invalid input type"));
}
switch(parameters.get("output", "")) {
case "print":
System.out.println();
for (Object e : ji.collect()) {
Result result = (Result) e;
System.out.println(result.toPrintableString());
}
break;
case "hash":
System.out.println();
System.out.println(DataSetUtils.checksumHashCode(ji));
break;
case "csv":
String filename = parameters.getRequired("output_filename");
String lineDelimiter = StringEscapeUtils.unescapeJava(parameters.get("output_line_delimiter", CsvOutputFormat.DEFAULT_LINE_DELIMITER));
String fieldDelimiter = StringEscapeUtils.unescapeJava(parameters.get("output_field_delimiter", CsvOutputFormat.DEFAULT_FIELD_DELIMITER));
ji.writeAsCsv(filename, lineDelimiter, fieldDelimiter);
env.execute("Jaccard Index");
break;
default:
throw new ProgramParametrizationException(getUsage("invalid output type"));
}
JobExecutionResult result = env.getLastJobExecutionResult();
NumberFormat nf = NumberFormat.getInstance();
System.out.println();
System.out.println("Execution runtime: " + nf.format(result.getNetRuntime()) + " ms");
}
use of org.apache.flink.types.IntValue in project flink by apache.
the class LongValueToSignedIntValueTest method testTranslation.
@Test
public void testTranslation() throws Exception {
assertEquals(new IntValue(Integer.MIN_VALUE), translator.translate(new LongValue((long) Integer.MIN_VALUE), reuse));
assertEquals(new IntValue(0), translator.translate(new LongValue(0L), reuse));
assertEquals(new IntValue(Integer.MAX_VALUE), translator.translate(new LongValue((long) Integer.MAX_VALUE), reuse));
}
Aggregations