use of java.text.NumberFormat in project flink by apache.
the class JaccardIndex method main.
public static void main(String[] args) throws Exception {
// Set up the execution environment
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.getConfig().enableObjectReuse();
ParameterTool parameters = ParameterTool.fromArgs(args);
env.getConfig().setGlobalJobParameters(parameters);
int little_parallelism = parameters.getInt("little_parallelism", PARALLELISM_DEFAULT);
DataSet ji;
switch(parameters.get("input", "")) {
case "csv":
{
String lineDelimiter = StringEscapeUtils.unescapeJava(parameters.get("input_line_delimiter", CsvOutputFormat.DEFAULT_LINE_DELIMITER));
String fieldDelimiter = StringEscapeUtils.unescapeJava(parameters.get("input_field_delimiter", CsvOutputFormat.DEFAULT_FIELD_DELIMITER));
GraphCsvReader reader = Graph.fromCsvReader(parameters.getRequired("input_filename"), env).ignoreCommentsEdges("#").lineDelimiterEdges(lineDelimiter).fieldDelimiterEdges(fieldDelimiter);
switch(parameters.get("type", "")) {
case "integer":
{
Graph<LongValue, NullValue, NullValue> graph = reader.keyType(LongValue.class);
if (parameters.getBoolean("simplify", false)) {
graph = graph.run(new org.apache.flink.graph.asm.simple.undirected.Simplify<LongValue, NullValue, NullValue>(false).setParallelism(little_parallelism));
}
ji = graph.run(new org.apache.flink.graph.library.similarity.JaccardIndex<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
}
break;
case "string":
{
Graph<StringValue, NullValue, NullValue> graph = reader.keyType(StringValue.class);
if (parameters.getBoolean("simplify", false)) {
graph = graph.run(new org.apache.flink.graph.asm.simple.undirected.Simplify<StringValue, NullValue, NullValue>(false).setParallelism(little_parallelism));
}
ji = graph.run(new org.apache.flink.graph.library.similarity.JaccardIndex<StringValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
}
break;
default:
throw new ProgramParametrizationException(getUsage("invalid CSV type"));
}
}
break;
case "rmat":
{
int scale = parameters.getInt("scale", DEFAULT_SCALE);
int edgeFactor = parameters.getInt("edge_factor", DEFAULT_EDGE_FACTOR);
RandomGenerableFactory<JDKRandomGenerator> rnd = new JDKRandomGeneratorFactory();
long vertexCount = 1L << scale;
long edgeCount = vertexCount * edgeFactor;
Graph<LongValue, NullValue, NullValue> graph = new RMatGraph<>(env, rnd, vertexCount, edgeCount).setParallelism(little_parallelism).generate();
boolean clipAndFlip = parameters.getBoolean("clip_and_flip", DEFAULT_CLIP_AND_FLIP);
if (scale > 32) {
ji = graph.run(new Simplify<LongValue, NullValue, NullValue>(clipAndFlip).setParallelism(little_parallelism)).run(new org.apache.flink.graph.library.similarity.JaccardIndex<LongValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
} else {
ji = graph.run(new TranslateGraphIds<LongValue, IntValue, NullValue, NullValue>(new LongValueToUnsignedIntValue()).setParallelism(little_parallelism)).run(new Simplify<IntValue, NullValue, NullValue>(clipAndFlip).setParallelism(little_parallelism)).run(new org.apache.flink.graph.library.similarity.JaccardIndex<IntValue, NullValue, NullValue>().setLittleParallelism(little_parallelism));
}
}
break;
default:
throw new ProgramParametrizationException(getUsage("invalid input type"));
}
switch(parameters.get("output", "")) {
case "print":
System.out.println();
for (Object e : ji.collect()) {
Result result = (Result) e;
System.out.println(result.toPrintableString());
}
break;
case "hash":
System.out.println();
System.out.println(DataSetUtils.checksumHashCode(ji));
break;
case "csv":
String filename = parameters.getRequired("output_filename");
String lineDelimiter = StringEscapeUtils.unescapeJava(parameters.get("output_line_delimiter", CsvOutputFormat.DEFAULT_LINE_DELIMITER));
String fieldDelimiter = StringEscapeUtils.unescapeJava(parameters.get("output_field_delimiter", CsvOutputFormat.DEFAULT_FIELD_DELIMITER));
ji.writeAsCsv(filename, lineDelimiter, fieldDelimiter);
env.execute("Jaccard Index");
break;
default:
throw new ProgramParametrizationException(getUsage("invalid output type"));
}
JobExecutionResult result = env.getLastJobExecutionResult();
NumberFormat nf = NumberFormat.getInstance();
System.out.println();
System.out.println("Execution runtime: " + nf.format(result.getNetRuntime()) + " ms");
}
use of java.text.NumberFormat in project hadoop by apache.
the class StoragePolicySummary method toString.
public String toString() {
StringBuilder compliantBlocksSB = new StringBuilder();
compliantBlocksSB.append("\nBlocks satisfying the specified storage policy:");
compliantBlocksSB.append("\nStorage Policy # of blocks % of blocks\n");
StringBuilder nonCompliantBlocksSB = new StringBuilder();
Formatter compliantFormatter = new Formatter(compliantBlocksSB);
Formatter nonCompliantFormatter = new Formatter(nonCompliantBlocksSB);
NumberFormat percentFormat = NumberFormat.getPercentInstance();
percentFormat.setMinimumFractionDigits(4);
percentFormat.setMaximumFractionDigits(4);
for (Map.Entry<StorageTypeAllocation, Long> storageComboCount : sortByComparator(storageComboCounts)) {
double percent = (double) storageComboCount.getValue() / (double) totalBlocks;
StorageTypeAllocation sta = storageComboCount.getKey();
if (sta.policyMatches()) {
compliantFormatter.format("%-25s %10d %20s%n", sta.getStoragePolicyDescriptor(), storageComboCount.getValue(), percentFormat.format(percent));
} else {
if (nonCompliantBlocksSB.length() == 0) {
nonCompliantBlocksSB.append("\nBlocks NOT satisfying the specified storage policy:");
nonCompliantBlocksSB.append("\nStorage Policy ");
nonCompliantBlocksSB.append("Specified Storage Policy # of blocks % of blocks\n");
}
nonCompliantFormatter.format("%-35s %-20s %10d %20s%n", sta.getStoragePolicyDescriptor(), sta.getSpecifiedStoragePolicy().getName(), storageComboCount.getValue(), percentFormat.format(percent));
}
}
if (nonCompliantBlocksSB.length() == 0) {
nonCompliantBlocksSB.append("\nAll blocks satisfy specified storage policy.\n");
}
compliantFormatter.close();
nonCompliantFormatter.close();
return compliantBlocksSB.toString() + nonCompliantBlocksSB;
}
use of java.text.NumberFormat in project hadoop by apache.
the class WeightSelector method configureOperations.
/**
* Sets up the operation using the given configuration by setting up the
* number of operations to perform (and how many are left) and setting up the
* operation objects to be used throughout selection.
*
* @param cfg
* ConfigExtractor.
*/
private void configureOperations(ConfigExtractor cfg) {
operations = new TreeMap<OperationType, OperationInfo>();
Map<OperationType, OperationData> opinfo = cfg.getOperations();
int totalAm = cfg.getOpCount();
int opsLeft = totalAm;
NumberFormat formatter = Formatter.getPercentFormatter();
for (final OperationType type : opinfo.keySet()) {
OperationData opData = opinfo.get(type);
OperationInfo info = new OperationInfo();
info.distribution = opData.getDistribution();
int amLeft = determineHowMany(totalAm, opData, type);
opsLeft -= amLeft;
LOG.info(type.name() + " has " + amLeft + " initial operations out of " + totalAm + " for its ratio " + formatter.format(opData.getPercent()));
info.amountLeft = amLeft;
Operation op = factory.getOperation(type);
// its done
if (op != null) {
Observer fn = new Observer() {
public void notifyFinished(Operation op) {
OperationInfo opInfo = operations.get(type);
if (opInfo != null) {
--opInfo.amountLeft;
}
}
public void notifyStarting(Operation op) {
}
};
info.operation = new ObserveableOp(op, fn);
operations.put(type, info);
}
}
if (opsLeft > 0) {
LOG.info(opsLeft + " left over operations found (due to inability to support partial operations)");
}
}
use of java.text.NumberFormat in project hadoop by apache.
the class ConfigExtractor method dumpOptions.
/**
* Dumps out the given options for the given config extractor
*
* @param cfg
* the config to write to the log
*/
static void dumpOptions(ConfigExtractor cfg) {
if (cfg == null) {
return;
}
LOG.info("Base directory = " + cfg.getBaseDirectory());
LOG.info("Data directory = " + cfg.getDataPath());
LOG.info("Output directory = " + cfg.getOutputPath());
LOG.info("Result file = " + cfg.getResultFile());
LOG.info("Grid queue = " + cfg.getQueueName());
LOG.info("Should exit on first error = " + cfg.shouldExitOnFirstError());
{
String duration = "Duration = ";
if (cfg.getDurationMilliseconds() == Integer.MAX_VALUE) {
duration += "unlimited";
} else {
duration += cfg.getDurationMilliseconds() + " milliseconds";
}
LOG.info(duration);
}
LOG.info("Map amount = " + cfg.getMapAmount());
LOG.info("Reducer amount = " + cfg.getReducerAmount());
LOG.info("Operation amount = " + cfg.getOpCount());
LOG.info("Total file limit = " + cfg.getTotalFiles());
LOG.info("Total dir file limit = " + cfg.getDirSize());
{
String read = "Read size = ";
if (cfg.shouldReadFullFile()) {
read += "entire file";
} else {
read += cfg.getReadSize() + " bytes";
}
LOG.info(read);
}
{
String write = "Write size = ";
if (cfg.shouldWriteUseBlockSize()) {
write += "blocksize";
} else {
write += cfg.getWriteSize() + " bytes";
}
LOG.info(write);
}
{
String append = "Append size = ";
if (cfg.shouldAppendUseBlockSize()) {
append += "blocksize";
} else {
append += cfg.getAppendSize() + " bytes";
}
LOG.info(append);
}
{
String bsize = "Block size = ";
bsize += cfg.getBlockSize() + " bytes";
LOG.info(bsize);
}
if (cfg.getRandomSeed() != null) {
LOG.info("Random seed = " + cfg.getRandomSeed());
}
if (cfg.getSleepRange() != null) {
LOG.info("Sleep range = " + cfg.getSleepRange() + " milliseconds");
}
LOG.info("Replication amount = " + cfg.getReplication());
LOG.info("Operations are:");
NumberFormat percFormatter = Formatter.getPercentFormatter();
Map<OperationType, OperationData> operations = cfg.getOperations();
for (OperationType type : operations.keySet()) {
String name = type.name();
LOG.info(name);
OperationData opInfo = operations.get(type);
LOG.info(" " + opInfo.getDistribution().name());
if (opInfo.getPercent() != null) {
LOG.info(" " + percFormatter.format(opInfo.getPercent()));
} else {
LOG.info(" ???");
}
}
}
use of java.text.NumberFormat in project hive by apache.
the class DefaultStateProvider method getId.
/**
* Default implementation. Here, ids are generated randomly.
*/
@Override
public int getId() {
NumberFormat numberFormat = NumberFormat.getInstance();
numberFormat.setMinimumIntegerDigits(5);
numberFormat.setGroupingUsed(false);
return Integer.parseInt(numberFormat.format(Math.abs(new Random().nextInt())));
}
Aggregations