use of java.text.NumberFormat in project hadoop by apache.
the class WeightSelector method configureOperations.
/**
* Sets up the operation using the given configuration by setting up the
* number of operations to perform (and how many are left) and setting up the
* operation objects to be used throughout selection.
*
* @param cfg
* ConfigExtractor.
*/
private void configureOperations(ConfigExtractor cfg) {
operations = new TreeMap<OperationType, OperationInfo>();
Map<OperationType, OperationData> opinfo = cfg.getOperations();
int totalAm = cfg.getOpCount();
int opsLeft = totalAm;
NumberFormat formatter = Formatter.getPercentFormatter();
for (final OperationType type : opinfo.keySet()) {
OperationData opData = opinfo.get(type);
OperationInfo info = new OperationInfo();
info.distribution = opData.getDistribution();
int amLeft = determineHowMany(totalAm, opData, type);
opsLeft -= amLeft;
LOG.info(type.name() + " has " + amLeft + " initial operations out of " + totalAm + " for its ratio " + formatter.format(opData.getPercent()));
info.amountLeft = amLeft;
Operation op = factory.getOperation(type);
// its done
if (op != null) {
Observer fn = new Observer() {
public void notifyFinished(Operation op) {
OperationInfo opInfo = operations.get(type);
if (opInfo != null) {
--opInfo.amountLeft;
}
}
public void notifyStarting(Operation op) {
}
};
info.operation = new ObserveableOp(op, fn);
operations.put(type, info);
}
}
if (opsLeft > 0) {
LOG.info(opsLeft + " left over operations found (due to inability to support partial operations)");
}
}
use of java.text.NumberFormat in project hadoop by apache.
the class ConfigExtractor method dumpOptions.
/**
* Dumps out the given options for the given config extractor
*
* @param cfg
* the config to write to the log
*/
static void dumpOptions(ConfigExtractor cfg) {
if (cfg == null) {
return;
}
LOG.info("Base directory = " + cfg.getBaseDirectory());
LOG.info("Data directory = " + cfg.getDataPath());
LOG.info("Output directory = " + cfg.getOutputPath());
LOG.info("Result file = " + cfg.getResultFile());
LOG.info("Grid queue = " + cfg.getQueueName());
LOG.info("Should exit on first error = " + cfg.shouldExitOnFirstError());
{
String duration = "Duration = ";
if (cfg.getDurationMilliseconds() == Integer.MAX_VALUE) {
duration += "unlimited";
} else {
duration += cfg.getDurationMilliseconds() + " milliseconds";
}
LOG.info(duration);
}
LOG.info("Map amount = " + cfg.getMapAmount());
LOG.info("Reducer amount = " + cfg.getReducerAmount());
LOG.info("Operation amount = " + cfg.getOpCount());
LOG.info("Total file limit = " + cfg.getTotalFiles());
LOG.info("Total dir file limit = " + cfg.getDirSize());
{
String read = "Read size = ";
if (cfg.shouldReadFullFile()) {
read += "entire file";
} else {
read += cfg.getReadSize() + " bytes";
}
LOG.info(read);
}
{
String write = "Write size = ";
if (cfg.shouldWriteUseBlockSize()) {
write += "blocksize";
} else {
write += cfg.getWriteSize() + " bytes";
}
LOG.info(write);
}
{
String append = "Append size = ";
if (cfg.shouldAppendUseBlockSize()) {
append += "blocksize";
} else {
append += cfg.getAppendSize() + " bytes";
}
LOG.info(append);
}
{
String bsize = "Block size = ";
bsize += cfg.getBlockSize() + " bytes";
LOG.info(bsize);
}
if (cfg.getRandomSeed() != null) {
LOG.info("Random seed = " + cfg.getRandomSeed());
}
if (cfg.getSleepRange() != null) {
LOG.info("Sleep range = " + cfg.getSleepRange() + " milliseconds");
}
LOG.info("Replication amount = " + cfg.getReplication());
LOG.info("Operations are:");
NumberFormat percFormatter = Formatter.getPercentFormatter();
Map<OperationType, OperationData> operations = cfg.getOperations();
for (OperationType type : operations.keySet()) {
String name = type.name();
LOG.info(name);
OperationData opInfo = operations.get(type);
LOG.info(" " + opInfo.getDistribution().name());
if (opInfo.getPercent() != null) {
LOG.info(" " + percFormatter.format(opInfo.getPercent()));
} else {
LOG.info(" ???");
}
}
}
use of java.text.NumberFormat in project flink by apache.
the class ClusteringCoefficient method main.
public static void main(String[] args) throws Exception {
// Set up the execution environment
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.getConfig().enableObjectReuse();
ParameterTool parameters = ParameterTool.fromArgs(args);
env.getConfig().setGlobalJobParameters(parameters);
if (!parameters.has("directed")) {
throw new ProgramParametrizationException(getUsage("must declare execution mode as '--directed true' or '--directed false'"));
}
boolean directedAlgorithm = parameters.getBoolean("directed");
int little_parallelism = parameters.getInt("little_parallelism", PARALLELISM_DEFAULT);
// global and local clustering coefficient results
GraphAnalytic gcc;
GraphAnalytic acc;
DataSet lcc;
switch(parameters.get("input", "")) {
case "csv":
{
String lineDelimiter = StringEscapeUtils.unescapeJava(parameters.get("input_line_delimiter", CsvOutputFormat.DEFAULT_LINE_DELIMITER));
String fieldDelimiter = StringEscapeUtils.unescapeJava(parameters.get("input_field_delimiter", CsvOutputFormat.DEFAULT_FIELD_DELIMITER));
GraphCsvReader reader = Graph.fromCsvReader(parameters.get("input_filename"), env).ignoreCommentsEdges("#").lineDelimiterEdges(lineDelimiter).fieldDelimiterEdges(fieldDelimiter);
switch(parameters.get("type", "")) {
case "integer":
{
Graph<LongValue, NullValue, NullValue> graph = reader.keyType(LongValue.class);
if (directedAlgorithm) {
if (parameters.getBoolean("simplify", false)) {
graph = graph.run(new org.apache.flink.graph.asm.simple.directed.Simplify<LongValue, NullValue, NullValue>().setParallelism(little_parallelism));
}
gcc = graph.run(new org.apache.flink.graph.library.clustering.directed.GlobalClusteringCoefficient<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
acc = graph.run(new org.apache.flink.graph.library.clustering.directed.AverageClusteringCoefficient<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
lcc = graph.run(new org.apache.flink.graph.library.clustering.directed.LocalClusteringCoefficient<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
} else {
if (parameters.getBoolean("simplify", false)) {
graph = graph.run(new org.apache.flink.graph.asm.simple.undirected.Simplify<LongValue, NullValue, NullValue>(false).setParallelism(little_parallelism));
}
gcc = graph.run(new org.apache.flink.graph.library.clustering.undirected.GlobalClusteringCoefficient<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
acc = graph.run(new org.apache.flink.graph.library.clustering.undirected.AverageClusteringCoefficient<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
lcc = graph.run(new org.apache.flink.graph.library.clustering.undirected.LocalClusteringCoefficient<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
}
}
break;
case "string":
{
Graph<StringValue, NullValue, NullValue> graph = reader.keyType(StringValue.class);
if (directedAlgorithm) {
if (parameters.getBoolean("simplify", false)) {
graph = graph.run(new org.apache.flink.graph.asm.simple.directed.Simplify<StringValue, NullValue, NullValue>().setParallelism(little_parallelism));
}
gcc = graph.run(new org.apache.flink.graph.library.clustering.directed.GlobalClusteringCoefficient<StringValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
acc = graph.run(new org.apache.flink.graph.library.clustering.directed.AverageClusteringCoefficient<StringValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
lcc = graph.run(new org.apache.flink.graph.library.clustering.directed.LocalClusteringCoefficient<StringValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
} else {
if (parameters.getBoolean("simplify", false)) {
graph = graph.run(new org.apache.flink.graph.asm.simple.undirected.Simplify<StringValue, NullValue, NullValue>(false).setParallelism(little_parallelism));
}
gcc = graph.run(new org.apache.flink.graph.library.clustering.undirected.GlobalClusteringCoefficient<StringValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
acc = graph.run(new org.apache.flink.graph.library.clustering.undirected.AverageClusteringCoefficient<StringValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
lcc = graph.run(new org.apache.flink.graph.library.clustering.undirected.LocalClusteringCoefficient<StringValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
}
}
break;
default:
throw new ProgramParametrizationException(getUsage("invalid CSV type"));
}
}
break;
case "rmat":
{
int scale = parameters.getInt("scale", DEFAULT_SCALE);
int edgeFactor = parameters.getInt("edge_factor", DEFAULT_EDGE_FACTOR);
RandomGenerableFactory<JDKRandomGenerator> rnd = new JDKRandomGeneratorFactory();
long vertexCount = 1L << scale;
long edgeCount = vertexCount * edgeFactor;
Graph<LongValue, NullValue, NullValue> graph = new RMatGraph<>(env, rnd, vertexCount, edgeCount).setParallelism(little_parallelism).generate();
if (directedAlgorithm) {
if (scale > 32) {
Graph<LongValue, NullValue, NullValue> newGraph = graph.run(new org.apache.flink.graph.asm.simple.directed.Simplify<LongValue, NullValue, NullValue>().setParallelism(little_parallelism));
gcc = newGraph.run(new org.apache.flink.graph.library.clustering.directed.GlobalClusteringCoefficient<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
acc = newGraph.run(new org.apache.flink.graph.library.clustering.directed.AverageClusteringCoefficient<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
lcc = newGraph.run(new org.apache.flink.graph.library.clustering.directed.LocalClusteringCoefficient<LongValue, NullValue, NullValue>().setIncludeZeroDegreeVertices(false).setLittleParallelism(little_parallelism));
} else {
Graph<IntValue, NullValue, NullValue> newGraph = graph.run(new TranslateGraphIds<LongValue, IntValue, NullValue, NullValue>(new LongValueToUnsignedIntValue()).setParallelism(little_parallelism)).run(new org.apache.flink.graph.asm.simple.directed.Simplify<IntValue, NullValue, NullValue>().setParallelism(little_parallelism));
gcc = newGraph.run(new org.apache.flink.graph.library.clustering.directed.GlobalClusteringCoefficient<IntValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
acc = newGraph.run(new org.apache.flink.graph.library.clustering.directed.AverageClusteringCoefficient<IntValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
lcc = newGraph.run(new org.apache.flink.graph.library.clustering.directed.LocalClusteringCoefficient<IntValue, NullValue, NullValue>().setIncludeZeroDegreeVertices(false).setLittleParallelism(little_parallelism));
}
} else {
boolean clipAndFlip = parameters.getBoolean("clip_and_flip", DEFAULT_CLIP_AND_FLIP);
if (scale > 32) {
Graph<LongValue, NullValue, NullValue> newGraph = graph.run(new org.apache.flink.graph.asm.simple.undirected.Simplify<LongValue, NullValue, NullValue>(clipAndFlip).setParallelism(little_parallelism));
gcc = newGraph.run(new org.apache.flink.graph.library.clustering.undirected.GlobalClusteringCoefficient<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
acc = newGraph.run(new org.apache.flink.graph.library.clustering.undirected.AverageClusteringCoefficient<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
lcc = newGraph.run(new org.apache.flink.graph.library.clustering.undirected.LocalClusteringCoefficient<LongValue, NullValue, NullValue>().setIncludeZeroDegreeVertices(false).setLittleParallelism(little_parallelism));
} else {
Graph<IntValue, NullValue, NullValue> newGraph = graph.run(new TranslateGraphIds<LongValue, IntValue, NullValue, NullValue>(new LongValueToUnsignedIntValue()).setParallelism(little_parallelism)).run(new org.apache.flink.graph.asm.simple.undirected.Simplify<IntValue, NullValue, NullValue>(clipAndFlip).setParallelism(little_parallelism));
gcc = newGraph.run(new org.apache.flink.graph.library.clustering.undirected.GlobalClusteringCoefficient<IntValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
acc = newGraph.run(new org.apache.flink.graph.library.clustering.undirected.AverageClusteringCoefficient<IntValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
lcc = newGraph.run(new org.apache.flink.graph.library.clustering.undirected.LocalClusteringCoefficient<IntValue, NullValue, NullValue>().setIncludeZeroDegreeVertices(false).setLittleParallelism(little_parallelism));
}
}
}
break;
default:
throw new ProgramParametrizationException(getUsage("invalid input type"));
}
switch(parameters.get("output", "")) {
case "print":
if (directedAlgorithm) {
for (Object e : lcc.collect()) {
org.apache.flink.graph.library.clustering.directed.LocalClusteringCoefficient.Result result = (org.apache.flink.graph.library.clustering.directed.LocalClusteringCoefficient.Result) e;
System.out.println(result.toPrintableString());
}
} else {
for (Object e : lcc.collect()) {
org.apache.flink.graph.library.clustering.undirected.LocalClusteringCoefficient.Result result = (org.apache.flink.graph.library.clustering.undirected.LocalClusteringCoefficient.Result) e;
System.out.println(result.toPrintableString());
}
}
break;
case "hash":
System.out.println(DataSetUtils.checksumHashCode(lcc));
break;
case "csv":
String filename = parameters.get("output_filename");
String lineDelimiter = StringEscapeUtils.unescapeJava(parameters.get("output_line_delimiter", CsvOutputFormat.DEFAULT_LINE_DELIMITER));
String fieldDelimiter = StringEscapeUtils.unescapeJava(parameters.get("output_field_delimiter", CsvOutputFormat.DEFAULT_FIELD_DELIMITER));
lcc.writeAsCsv(filename, lineDelimiter, fieldDelimiter);
env.execute("Clustering Coefficient");
break;
default:
throw new ProgramParametrizationException(getUsage("invalid output type"));
}
System.out.println(gcc.getResult());
System.out.println(acc.getResult());
JobExecutionResult result = env.getLastJobExecutionResult();
NumberFormat nf = NumberFormat.getInstance();
System.out.println("Execution runtime: " + nf.format(result.getNetRuntime()) + " ms");
}
use of java.text.NumberFormat in project flink by apache.
the class Graph500 method main.
public static void main(String[] args) throws Exception {
// Set up the execution environment
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.getConfig().enableObjectReuse();
ParameterTool parameters = ParameterTool.fromArgs(args);
env.getConfig().setGlobalJobParameters(parameters);
if (!parameters.has("directed")) {
throw new ProgramParametrizationException(getUsage("must declare execution mode as '--directed true' or '--directed false'"));
}
boolean directed = parameters.getBoolean("directed");
if (!parameters.has("simplify")) {
throw new ProgramParametrizationException(getUsage("must declare '--simplify true' or '--simplify false'"));
}
boolean simplify = parameters.getBoolean("simplify");
// Generate RMat graph
int scale = parameters.getInt("scale", DEFAULT_SCALE);
int edgeFactor = parameters.getInt("edge_factor", DEFAULT_EDGE_FACTOR);
RandomGenerableFactory<JDKRandomGenerator> rnd = new JDKRandomGeneratorFactory();
long vertexCount = 1L << scale;
long edgeCount = vertexCount * edgeFactor;
boolean clipAndFlip = parameters.getBoolean("clip_and_flip", DEFAULT_CLIP_AND_FLIP);
Graph<LongValue, NullValue, NullValue> graph = new RMatGraph<>(env, rnd, vertexCount, edgeCount).generate();
if (directed) {
if (simplify) {
graph = graph.run(new org.apache.flink.graph.asm.simple.directed.Simplify<LongValue, NullValue, NullValue>());
}
} else {
if (simplify) {
graph = graph.run(new org.apache.flink.graph.asm.simple.undirected.Simplify<LongValue, NullValue, NullValue>(clipAndFlip));
} else {
graph = graph.getUndirected();
}
}
DataSet<Tuple2<LongValue, LongValue>> edges = graph.getEdges().project(0, 1);
// Print, hash, or write RMat graph to disk
switch(parameters.get("output", "")) {
case "print":
System.out.println();
edges.print();
break;
case "hash":
System.out.println();
System.out.println(DataSetUtils.checksumHashCode(edges));
break;
case "csv":
String filename = parameters.getRequired("output_filename");
String lineDelimiter = StringEscapeUtils.unescapeJava(parameters.get("output_line_delimiter", CsvOutputFormat.DEFAULT_LINE_DELIMITER));
String fieldDelimiter = StringEscapeUtils.unescapeJava(parameters.get("output_field_delimiter", CsvOutputFormat.DEFAULT_FIELD_DELIMITER));
edges.writeAsCsv(filename, lineDelimiter, fieldDelimiter);
env.execute("Graph500");
break;
default:
throw new ProgramParametrizationException(getUsage("invalid output type"));
}
JobExecutionResult result = env.getLastJobExecutionResult();
NumberFormat nf = NumberFormat.getInstance();
System.out.println();
System.out.println("Execution runtime: " + nf.format(result.getNetRuntime()) + " ms");
}
use of java.text.NumberFormat in project flink by apache.
the class HITS method main.
public static void main(String[] args) throws Exception {
// Set up the execution environment
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.getConfig().enableObjectReuse();
ParameterTool parameters = ParameterTool.fromArgs(args);
env.getConfig().setGlobalJobParameters(parameters);
int iterations = parameters.getInt("iterations", DEFAULT_ITERATIONS);
DataSet hits;
switch(parameters.get("input", "")) {
case "csv":
{
String lineDelimiter = StringEscapeUtils.unescapeJava(parameters.get("input_line_delimiter", CsvOutputFormat.DEFAULT_LINE_DELIMITER));
String fieldDelimiter = StringEscapeUtils.unescapeJava(parameters.get("input_field_delimiter", CsvOutputFormat.DEFAULT_FIELD_DELIMITER));
GraphCsvReader reader = Graph.fromCsvReader(parameters.getRequired("input_filename"), env).ignoreCommentsEdges("#").lineDelimiterEdges(lineDelimiter).fieldDelimiterEdges(fieldDelimiter);
switch(parameters.get("type", "")) {
case "integer":
{
hits = reader.keyType(LongValue.class).run(new org.apache.flink.graph.library.link_analysis.HITS<LongValue, NullValue, NullValue>(iterations));
}
break;
case "string":
{
hits = reader.keyType(StringValue.class).run(new org.apache.flink.graph.library.link_analysis.HITS<StringValue, NullValue, NullValue>(iterations));
}
break;
default:
throw new ProgramParametrizationException(getUsage("invalid CSV type"));
}
}
break;
case "rmat":
{
int scale = parameters.getInt("scale", DEFAULT_SCALE);
int edgeFactor = parameters.getInt("edge_factor", DEFAULT_EDGE_FACTOR);
RandomGenerableFactory<JDKRandomGenerator> rnd = new JDKRandomGeneratorFactory();
long vertexCount = 1L << scale;
long edgeCount = vertexCount * edgeFactor;
Graph<LongValue, NullValue, NullValue> graph = new RMatGraph<>(env, rnd, vertexCount, edgeCount).generate();
if (scale > 32) {
hits = graph.run(new Simplify<LongValue, NullValue, NullValue>()).run(new org.apache.flink.graph.library.link_analysis.HITS<LongValue, NullValue, NullValue>(iterations));
} else {
hits = graph.run(new TranslateGraphIds<LongValue, IntValue, NullValue, NullValue>(new LongValueToUnsignedIntValue())).run(new Simplify<IntValue, NullValue, NullValue>()).run(new org.apache.flink.graph.library.link_analysis.HITS<IntValue, NullValue, NullValue>(iterations));
}
}
break;
default:
throw new ProgramParametrizationException(getUsage("invalid input type"));
}
switch(parameters.get("output", "")) {
case "print":
System.out.println();
for (Object e : hits.collect()) {
System.out.println(((Result) e).toPrintableString());
}
break;
case "hash":
System.out.println();
System.out.println(DataSetUtils.checksumHashCode(hits));
break;
case "csv":
String filename = parameters.getRequired("output_filename");
String lineDelimiter = StringEscapeUtils.unescapeJava(parameters.get("output_line_delimiter", CsvOutputFormat.DEFAULT_LINE_DELIMITER));
String fieldDelimiter = StringEscapeUtils.unescapeJava(parameters.get("output_field_delimiter", CsvOutputFormat.DEFAULT_FIELD_DELIMITER));
hits.writeAsCsv(filename, lineDelimiter, fieldDelimiter);
env.execute("HITS");
break;
default:
throw new ProgramParametrizationException(getUsage("invalid output type"));
}
JobExecutionResult result = env.getLastJobExecutionResult();
NumberFormat nf = NumberFormat.getInstance();
System.out.println();
System.out.println("Execution runtime: " + nf.format(result.getNetRuntime()) + " ms");
}
Aggregations