use of org.apache.sysml.runtime.instructions.cp.CPInstruction.CPINSTRUCTION_TYPE in project incubator-systemml by apache.
the class CostEstimatorStaticRuntime method getNFLOP.
private double getNFLOP(String optype, boolean inMR, long d1m, long d1n, double d1s, long d2m, long d2n, double d2s, long d3m, long d3n, double d3s, String[] args) throws DMLRuntimeException {
//operation costs in FLOP on matrix block level (for CP and MR instructions)
//(excludes IO and parallelism; assumes known dims for all inputs, outputs )
boolean leftSparse = MatrixBlock.evalSparseFormatInMemory(d1m, d1n, (long) (d1s * d1m * d1n));
boolean rightSparse = MatrixBlock.evalSparseFormatInMemory(d2m, d2n, (long) (d2s * d2m * d2n));
boolean onlyLeft = (d1m >= 0 && d1n >= 0 && d2m < 0 && d2n < 0);
boolean allExists = (d1m >= 0 && d1n >= 0 && d2m >= 0 && d2n >= 0 && d3m >= 0 && d3n >= 0);
//NOTE: all instruction types that are equivalent in CP and MR are only
//included in CP to prevent redundancy
CPINSTRUCTION_TYPE cptype = CPInstructionParser.String2CPInstructionType.get(optype);
if (//for CP Ops and equivalent MR ops
cptype != null) {
//general approach: count of floating point *, /, +, -, ^, builtin ;
switch(cptype) {
case //opcodes: ba+*, cov
AggregateBinary:
if (optype.equals("ba+*")) {
//average flop count
if (!leftSparse && !rightSparse)
return 2 * (d1m * d1n * ((d2n > 1) ? d1s : 1.0) * d2n) / 2;
else if (!leftSparse && rightSparse)
return 2 * (d1m * d1n * d1s * d2n * d2s) / 2;
else if (leftSparse && !rightSparse)
return 2 * (d1m * d1n * d1s * d2n) / 2;
else
//leftSparse && rightSparse
return 2 * (d1m * d1n * d1s * d2n * d2s) / 2;
} else if (optype.equals("cov")) {
//(11+3*k+)
return 23 * d1m;
}
return 0;
case MMChain:
//(mmchain essentially two matrix-vector muliplications)
if (!leftSparse)
return (2 + 2) * (d1m * d1n) / 2;
else
return (2 + 2) * (d1m * d1n * d1s) / 2;
case //opcodes: tak+*
AggregateTernary:
//2*1(*) + 4 (k+)
return 6 * d1m * d1n;
case //opcodes: uak+, uark+, uack+, uasqk+, uarsqk+, uacsqk+,
AggregateUnary:
if (optype.equals("nrow") || optype.equals("ncol") || optype.equals("length"))
return DEFAULT_NFLOP_NOOP;
else if (optype.equals("cm")) {
double xcm = 1;
switch(Integer.parseInt(args[0])) {
//count
case 0:
xcm = 1;
break;
//mean
case 1:
xcm = 8;
break;
//cm2
case 2:
xcm = 16;
break;
//cm3
case 3:
xcm = 31;
break;
//cm4
case 4:
xcm = 51;
break;
//variance
case 5:
xcm = 16;
break;
}
return (leftSparse) ? xcm * (d1m * d1s + 1) : xcm * d1m;
} else if (optype.equals("uatrace") || optype.equals("uaktrace"))
return 2 * d1m * d1n;
else if (optype.equals("ua+") || optype.equals("uar+") || optype.equals("uac+")) {
//sparse safe operations
if (//dense
!leftSparse)
return d1m * d1n;
else
//sparse
return d1m * d1n * d1s;
} else if (optype.equals("uak+") || optype.equals("uark+") || optype.equals("uack+"))
//1*k+
return 4 * d1m * d1n;
else if (optype.equals("uasqk+") || optype.equals("uarsqk+") || optype.equals("uacsqk+"))
// +1 for multiplication to square term
return 5 * d1m * d1n;
else if (optype.equals("uamean") || optype.equals("uarmean") || optype.equals("uacmean"))
//1*k+
return 7 * d1m * d1n;
else if (optype.equals("uavar") || optype.equals("uarvar") || optype.equals("uacvar"))
return 14 * d1m * d1n;
else if (optype.equals("uamax") || optype.equals("uarmax") || optype.equals("uacmax") || optype.equals("uamin") || optype.equals("uarmin") || optype.equals("uacmin") || optype.equals("uarimax") || optype.equals("ua*"))
return d1m * d1n;
return 0;
case //opcodes: +, -, *, /, ^ (incl. ^2, *2)
ArithmeticBinary:
//note: covers scalar-scalar, scalar-matrix, matrix-matrix
if (optype.equals("+") || //sparse safe
optype.equals("-") && (leftSparse || rightSparse))
return d1m * d1n * d1s + d2m * d2n * d2s;
else
return d3m * d3n;
case //opcodes: ctable
Ternary:
if (optype.equals("ctable")) {
if (leftSparse)
//add
return d1m * d1n * d1s;
else
return d1m * d1n;
}
return 0;
case //opcodes: &&, ||
BooleanBinary:
//always scalar-scalar
return 1;
case //opcodes: !
BooleanUnary:
//always scalar-scalar
return 1;
case //opcodes: log
Builtin:
//note: can be unary or binary
if (//binary
allExists)
return 3 * d3m * d3n;
else
//unary
return d3m * d3n;
case //opcodes: max, min, solve
BuiltinBinary:
//note: covers scalar-scalar, scalar-matrix, matrix-matrix
if (//see also MultiReturnBuiltin
optype.equals("solve"))
//for 1kx1k ~ 1GFLOP -> 0.5s
return d1m * d1n * d1n;
else
//default
return d3m * d3n;
case //opcodes: exp, abs, sin, cos, tan, sign, sqrt, plogp, print, round, sprop, sigmoid
BuiltinUnary:
//TODO add cost functions for commons math builtins: inverse, cholesky
if (//scalar only
optype.equals("print"))
return 1;
else {
//default for all ops
double xbu = 1;
if (optype.equals("plogp"))
xbu = 2;
else if (optype.equals("round"))
xbu = 4;
if (optype.equals("sin") || optype.equals("tan") || optype.equals("round") || optype.equals("abs") || optype.equals("sqrt") || optype.equals("sprop") || optype.equals("sigmoid") || //sparse-safe
optype.equals("sign")) {
if (//sparse
leftSparse)
return xbu * d1m * d1n * d1s;
else
//dense
return xbu * d1m * d1n;
} else
return xbu * d1m * d1n;
}
//opcodes: r', rdiag
case Reorg:
case //opcodes: rshape
MatrixReshape:
if (leftSparse)
return d1m * d1n * d1s;
else
return d1m * d1n;
case //opcodes: append
Append:
return DEFAULT_NFLOP_CP * (((leftSparse) ? d1m * d1n * d1s : d1m * d1n) + ((rightSparse) ? d2m * d2n * d2s : d2m * d2n));
case //opcodes: ==, !=, <, >, <=, >=
RelationalBinary:
//note: all relational ops are not sparsesafe
return //covers all combinations of scalar and matrix
d3m * d3n;
case //opcodes: rm, mv
File:
return DEFAULT_NFLOP_NOOP;
case //opcodes: assignvar, cpvar, rmvar, rmfilevar, assignvarwithfile, attachfiletovar, valuepick, iqsize, read, write, createvar, setfilename, castAsMatrix
Variable:
if (optype.equals("write")) {
boolean text = args[0].equals("textcell") || args[0].equals("csv");
double xwrite = text ? DEFAULT_NFLOP_TEXT_IO : DEFAULT_NFLOP_CP;
if (!leftSparse)
return d1m * d1n * xwrite;
else
return d1m * d1n * d1s * xwrite;
} else if (optype.equals("inmem-iqm"))
//note: assumes uniform distribution
return //sum of weights
2 * d1m + 5 + //scan to lower quantile
0.25d * d1m + //scan from lower to upper quantile
8 * 0.5 * d1m;
else
return DEFAULT_NFLOP_NOOP;
case //opcodes: rand, seq
Rand:
if (optype.equals(DataGen.RAND_OPCODE)) {
//per random number
int nflopRand = 32;
switch(Integer.parseInt(args[0])) {
//empty matrix
case 0:
return DEFAULT_NFLOP_NOOP;
//allocate, arrayfill
case 1:
return d3m * d3n * 8;
case //full rand
2:
{
if (d3s == 1.0)
//DENSE gen (incl allocate)
return d3m * d3n * nflopRand + d3m * d3n * 8;
else
return (d3s >= MatrixBlock.SPARSITY_TURN_POINT) ? //DENSE gen (incl allocate)
2 * d3m * d3n * nflopRand + d3m * d3n * 8 : //SPARSE gen (incl allocate)
3 * d3m * d3n * d3s * nflopRand + d3m * d3n * d3s * 24;
}
}
} else
//seq
return d3m * d3n * DEFAULT_NFLOP_CP;
case //sinit
StringInit:
return d3m * d3n * DEFAULT_NFLOP_CP;
case //opcodes: extfunct
External:
//note: should be invoked independently for multiple outputs
return d1m * d1n * d1s * DEFAULT_NFLOP_UNKNOWN;
case //opcodes: qr, lu, eigen
MultiReturnBuiltin:
//note: they all have cubic complexity, the scaling factor refers to commons.math
//default e.g, qr
double xf = 2;
if (optype.equals("eigen"))
xf = 32;
else if (optype.equals("lu"))
xf = 16;
//for 1kx1k ~ 2GFLOP -> 1s
return xf * d1m * d1n * d1n;
case //opcodes: cdf, invcdf, groupedagg, rmempty
ParameterizedBuiltin:
if (optype.equals("cdf") || optype.equals("invcdf"))
//scalar call to commons.math
return DEFAULT_NFLOP_UNKNOWN;
else if (optype.equals("groupedagg")) {
double xga = 1;
switch(Integer.parseInt(args[0])) {
//sum, see uk+
case 0:
xga = 4;
break;
//count, see cm
case 1:
xga = 1;
break;
//mean
case 2:
xga = 8;
break;
//cm2
case 3:
xga = 16;
break;
//cm3
case 4:
xga = 31;
break;
//cm4
case 5:
xga = 51;
break;
//variance
case 6:
xga = 16;
break;
}
//scan for min/max, groupedagg
return 2 * d1m + xga * d1m;
} else if (optype.equals("rmempty")) {
switch(Integer.parseInt(args[0])) {
case //remove rows
0:
return ((leftSparse) ? d1m : d1m * Math.ceil(1.0d / d1s) / 2) + DEFAULT_NFLOP_CP * d3m * d2m;
case //remove cols
1:
return d1n * Math.ceil(1.0d / d1s) / 2 + DEFAULT_NFLOP_CP * d3m * d2m;
}
}
return 0;
case //opcodes: sort
QSort:
if (optype.equals("sort")) {
//note: mergesort since comparator used
double sortCosts = 0;
if (onlyLeft)
sortCosts = DEFAULT_NFLOP_CP * d1m + d1m;
else
//w/ weights
sortCosts = DEFAULT_NFLOP_CP * ((leftSparse) ? d1m * d1s : d1m);
return //mergesort
sortCosts + d1m * (int) (Math.log(d1m) / Math.log(2)) + DEFAULT_NFLOP_CP * d1m;
}
return 0;
case //opcodes: rangeReIndex, leftIndex
MatrixIndexing:
if (optype.equals("leftIndex")) {
return DEFAULT_NFLOP_CP * ((leftSparse) ? d1m * d1n * d1s : d1m * d1n) + 2 * DEFAULT_NFLOP_CP * ((rightSparse) ? d2m * d2n * d2s : d2m * d2n);
} else if (optype.equals("rangeReIndex")) {
return DEFAULT_NFLOP_CP * ((leftSparse) ? d2m * d2n * d2s : d2m * d2n);
}
return 0;
case //opcodes: tsmm
MMTSJ:
//average flop count
if (MMTSJType.valueOf(args[0]).isLeft()) {
//lefttranspose
if (//dense
!rightSparse)
return d1m * d1n * d1s * d1n / 2;
else
//sparse
return d1m * d1n * d1s * d1n * d1s / 2;
} else if (onlyLeft) {
//righttranspose
if (//dense
!leftSparse)
return (double) d1m * d1n * d1m / 2;
else
//sparse
return //reorg sparse
d1m * d1n * d1s + //core tsmm
d1m * d1n * d1s * d1n * d1s / 2;
}
return 0;
case Partition:
return //partitioning costs
d1m * d1n * d1s + (//include write cost if in CP
inMR ? //include write cost if in CP
0 : getHDFSWriteTime(d1m, d1n, d1s) * DEFAULT_FLOPS);
case INVALID:
return 0;
default:
throw new DMLRuntimeException("CostEstimator: unsupported instruction type: " + optype);
}
}
//if not found in CP instructions
MRINSTRUCTION_TYPE mrtype = MRInstructionParser.String2MRInstructionType.get(optype);
if (//for specific MR ops
mrtype != null) {
switch(mrtype) {
case //opcodes: a+, ak+, asqk+, a*, amax, amin, amean
Aggregate:
//TODO should be aggregate unary
int numMap = Integer.parseInt(args[0]);
if (optype.equals("ak+"))
return 4 * numMap * d1m * d1n * d1s;
else if (optype.equals("asqk+"))
// +1 for multiplication to square term
return 5 * numMap * d1m * d1n * d1s;
else if (optype.equals("avar"))
return 14 * numMap * d1m * d1n * d1s;
else
return numMap * d1m * d1n * d1s;
case //opcodes: cpmm, rmm, mapmult
AggregateBinary:
//note: copy from CP costs
if (optype.equals("cpmm") || optype.equals("rmm") || //matrix mult
optype.equals(MapMult.OPCODE)) {
//average flop count
if (!leftSparse && !rightSparse)
return 2 * (d1m * d1n * ((d2n > 1) ? d1s : 1.0) * d2n) / 2;
else if (!leftSparse && rightSparse)
return 2 * (d1m * d1n * d1s * d2n * d2s) / 2;
else if (leftSparse && !rightSparse)
return 2 * (d1m * d1n * d1s * d2n) / 2;
else
//leftSparse && rightSparse
return 2 * (d1m * d1n * d1s * d2n * d2s) / 2;
}
return 0;
case //opcodes: mapmultchain
MapMultChain:
//assume dense input2 and input3
return //ba(+*)
2 * d1m * d2n * d1n * ((d2n > 1) ? d1s : 1.0) + //cellwise b(*)
d1m * d2n + //r(t)
d1m * d2n + //ba(+*)
2 * d2n * d1n * d1m * (leftSparse ? d1s : 1.0) + //r(t)
d2n * d1n;
case //opcodes: s-r, so, max, min,
ArithmeticBinary:
//note: all relational ops are not sparsesafe
return //covers all combinations of scalar and matrix
d3m * d3n;
case //opcodes: combineunary
CombineUnary:
return d1m * d1n * d1s;
case //opcodes: combinebinary
CombineBinary:
return d1m * d1n * d1s + d2m * d2n * d2s;
case //opcodes: combinetertiary
CombineTernary:
return d1m * d1n * d1s + d2m * d2n * d2s + d3m * d3n * d3s;
case //opcodes: log, slog, pow
Unary:
//note: covers scalar, matrix, matrix-scalar
return d3m * d3n;
case //opcodes: ctabletransform, ctabletransformscalarweight, ctabletransformhistogram, ctabletransformweightedhistogram
Ternary:
//note: copy from cp
if (leftSparse)
//add
return d1m * d1n * d1s;
else
return d1m * d1n;
case Quaternary:
//TODO pattern specific and all 4 inputs requires
return d1m * d1n * d1s * 4;
case //opcodes: rblk
Reblock:
return DEFAULT_NFLOP_CP * ((leftSparse) ? d1m * d1n * d1s : d1m * d1n);
case //opcodes: rblk
Replicate:
return DEFAULT_NFLOP_CP * ((leftSparse) ? d1m * d1n * d1s : d1m * d1n);
case //opcodes: mean
CM_N_COV:
double xcm = 8;
return (leftSparse) ? xcm * (d1m * d1s + 1) : xcm * d1m;
case //opcodes: groupedagg
GroupedAggregate:
//TODO: need to consolidate categories (ParameterizedBuiltin)
//copy from CP opertion
double xga = 1;
switch(Integer.parseInt(args[0])) {
//sum, see uk+
case 0:
xga = 4;
break;
//count, see cm
case 1:
xga = 1;
break;
//mean
case 2:
xga = 8;
break;
//cm2
case 3:
xga = 16;
break;
//cm3
case 4:
xga = 31;
break;
//cm4
case 5:
xga = 51;
break;
//variance
case 6:
xga = 16;
break;
}
//scan for min/max, groupedagg
return 2 * d1m + xga * d1m;
case //opcodes: valuepick, rangepick
PickByCount:
break;
case //opcodes: rangeReIndex, rangeReIndexForLeft
RangeReIndex:
//TODO: requires category consolidation
if (optype.equals("rangeReIndex"))
return DEFAULT_NFLOP_CP * ((leftSparse) ? d2m * d2n * d2s : d2m * d2n);
else
//rangeReIndexForLeft
return DEFAULT_NFLOP_CP * ((leftSparse) ? d1m * d1n * d1s : d1m * d1n) + DEFAULT_NFLOP_CP * ((rightSparse) ? d2m * d2n * d2s : d2m * d2n);
case //opcodes: zeroOut
ZeroOut:
return DEFAULT_NFLOP_CP * ((leftSparse) ? d1m * d1n * d1s : d1m * d1n) + DEFAULT_NFLOP_CP * ((rightSparse) ? d2m * d2n * d2s : d2m * d2n);
default:
return 0;
}
} else {
throw new DMLRuntimeException("CostEstimator: unsupported instruction type: " + optype);
}
//should never come here.
return -1;
}
use of org.apache.sysml.runtime.instructions.cp.CPInstruction.CPINSTRUCTION_TYPE in project incubator-systemml by apache.
the class CPInstructionParser method parseSingleInstruction.
public static CPInstruction parseSingleInstruction(String str) throws DMLRuntimeException {
if (str == null || str.isEmpty())
return null;
CPINSTRUCTION_TYPE cptype = InstructionUtils.getCPType(str);
if (cptype == null)
throw new DMLRuntimeException("Unable derive cptype for instruction: " + str);
CPInstruction cpinst = parseSingleInstruction(cptype, str);
if (cpinst == null)
throw new DMLRuntimeException("Unable to parse instruction: " + str);
return cpinst;
}
Aggregations