use of hex.CreateFrame in project h2o-3 by h2oai.
the class CreateFrameHandler method run.
public JobV3 run(int version, CreateFrameV3 cf) {
if (cf.dest == null) {
cf.dest = new KeyV3.FrameKeyV3();
cf.dest.name = Key.rand();
}
CreateFrame cfr = new CreateFrame(cf.dest.key());
cf.fillImpl(cfr);
return new JobV3(cfr.execImpl());
}
use of hex.CreateFrame in project h2o-3 by h2oai.
the class ChunkSummaryTest method run.
@Test
public void run() {
CreateFrame cf = new CreateFrame();
cf.seed = 1234;
Frame f = cf.execImpl().get();
ChunkSummary cs = FrameUtils.chunkSummary(f);
TwoDimTable chunk_summary_table = cs.toTwoDimTableChunkTypes();
Log.info(chunk_summary_table);
TwoDimTableV3 td = new TwoDimTableV3().fillFromImpl(chunk_summary_table);
String json = td.toJsonString();
// if (H2O.CLOUD.size() == 1) {
// Assert.assertEquals("{\"__meta\":{\"schema_version\":3,\"schema_name\":\"TwoDimTableV3\"," +
// "\"schema_type\":\"TwoDimTable\"},\"name\":\"Chunk compression summary\",\"description\":\"\",\"columns\":[{\"__meta\":{\"schema_version\":-1,\"schema_name\":\"ColumnSpecsBase\",\"schema_type\":\"Iced\"},\"name\":\"chunk_type\",\"type\":\"string\",\"format\":\"%8s\",\"description\":\"Chunk Type\"},{\"__meta\":{\"schema_version\":-1,\"schema_name\":\"ColumnSpecsBase\",\"schema_type\":\"Iced\"},\"name\":\"chunk_name\",\"type\":\"string\",\"format\":\"%s\",\"description\":\"Chunk Name\"},{\"__meta\":{\"schema_version\":-1,\"schema_name\":\"ColumnSpecsBase\",\"schema_type\":\"Iced\"},\"name\":\"count\",\"type\":\"int\",\"format\":\"%10d\",\"description\":\"Count\"},{\"__meta\":{\"schema_version\":-1,\"schema_name\":\"ColumnSpecsBase\",\"schema_type\":\"Iced\"},\"name\":\"count_percentage\",\"type\":\"float\",\"format\":\"%10.3f %%\",\"description\":\"Count Percentage\"},{\"__meta\":{\"schema_version\":-1,\"schema_name\":\"ColumnSpecsBase\",\"schema_type\":\"Iced\"},\"name\":\"size\",\"type\":\"string\",\"format\":\"%10s\",\"description\":\"Size\"},{\"__meta\":{\"schema_version\":-1,\"schema_name\":\"ColumnSpecsBase\",\"schema_type\":\"Iced\"},\"name\":\"size_percentage\",\"type\":\"float\",\"format\":\"%10.3f %%\",\"description\":\"Size Percentage\"}],\"rowcount\":4,\"data\":[[\"CXI\",\"C1\",\"C1S\",\"C8D\"],[\"Sparse Integers\",\"1-Byte Integers\",\"1-Byte Fractions\",\"64-bit Reals\"],[9,18,18,45],[10.0,20.0,20.0,50.0],[\" 1.9 KB\",\" 20.7 KB\",\" 21.0 KB\",\" 393.6 KB\"],[0.42528477,4.7406745,4.8050036,90.02904]]}"
// ,json);
// }
TwoDimTable distribution_summary_table = cs.toTwoDimTableDistribution();
Log.info(distribution_summary_table);
json = new TwoDimTableV3().fillFromImpl(distribution_summary_table).toJsonString();
// if (H2O.CLOUD.size() == 1) {
// Assert.assertEquals("{\"__meta\":{\"schema_version\":-1,\"schema_name\":\"TwoDimTableV3\"," +
// "\"schema_type\":\"TwoDimTable\"},\"name\":\"Frame distribution summary\",\"description\":\"\",\"columns\":[{\"__meta\":{\"schema_version\":-1,\"schema_name\":\"ColumnSpecsBase\",\"schema_type\":\"Iced\"},\"name\":\"\",\"type\":\"string\",\"format\":\"%s\",\"description\":\"\"},{\"__meta\":{\"schema_version\":-1,\"schema_name\":\"ColumnSpecsBase\",\"schema_type\":\"Iced\"},\"name\":\"size\",\"type\":\"string\",\"format\":\"%s\",\"description\":\"Size\"},{\"__meta\":{\"schema_version\":-1,\"schema_name\":\"ColumnSpecsBase\",\"schema_type\":\"Iced\"},\"name\":\"number_of_rows\",\"type\":\"float\",\"format\":\"%f\",\"description\":\"Number of Rows\"},{\"__meta\":{\"schema_version\":-1,\"schema_name\":\"ColumnSpecsBase\",\"schema_type\":\"Iced\"},\"name\":\"number_of_chunks_per_column\",\"type\":\"float\",\"format\":\"%f\",\"description\":\"Number of Chunks per Column\"},{\"__meta\":{\"schema_version\":-1,\"schema_name\":\"ColumnSpecsBase\",\"schema_type\":\"Iced\"},\"name\":\"number_of_chunks\",\"type\":\"float\",\"format\":\"%f\",\"description\":\"Number of Chunks\"}],\"rowcount\":6,\"data\":[[\"172.16.2.81:54321\",\"mean\",\"min\",\"max\",\"stddev\",\"total\"],[\" 436.9 KB\",\" 436.9 KB\",\" 436.9 KB\",\" 436.9 KB\",\" 0 B\",\" 436.9 KB\"],[10000.0,10000.0,10000.0,10000.0,0.0,10000.0],[9.0,9.0,9.0,9.0,0.0,9.0],[90.0,90.0,90.0,90.0,0.0,90.0]]}", json);
// }
f.remove();
}
use of hex.CreateFrame in project h2o-3 by h2oai.
the class AggregatorTest method testAggregatorEigen.
@Test
public void testAggregatorEigen() {
CreateFrame cf = new CreateFrame();
cf.rows = 1000;
cf.cols = 10;
cf.categorical_fraction = 0.6;
cf.integer_fraction = 0.0;
cf.binary_fraction = 0.0;
cf.real_range = 100;
cf.integer_range = 100;
cf.missing_fraction = 0;
cf.factors = 5;
cf.seed = 1234;
Frame frame = cf.execImpl().get();
AggregatorModel.AggregatorParameters parms = new AggregatorModel.AggregatorParameters();
parms._train = frame._key;
parms._categorical_encoding = Model.Parameters.CategoricalEncodingScheme.Eigen;
long start = System.currentTimeMillis();
// 0.905
AggregatorModel agg = new Aggregator(parms).trainModel().get();
System.out.println("AggregatorModel finished in: " + (System.currentTimeMillis() - start) / 1000. + " seconds");
agg.checkConsistency();
Frame output = agg._output._output_frame.get();
System.out.println(output.toTwoDimTable(0, 10));
Log.info("Number of exemplars: " + agg._exemplars.length);
output.remove();
frame.remove();
agg.remove();
}
use of hex.CreateFrame in project h2o-3 by h2oai.
the class TimeSeriesTests method setup.
@BeforeClass
public static void setup() {
stall_till_cloudsize(1);
cf = new CreateFrame();
cf.rows = 2;
cf.cols = 256;
cf.binary_fraction = 0.0;
cf.binary_ones_fraction = 0.0;
cf.categorical_fraction = 0.0;
cf.integer_fraction = 0.0;
cf.missing_fraction = 0.0;
cf.seed = 123;
f = cf.execImpl().get();
}
use of hex.CreateFrame in project h2o-3 by h2oai.
the class GroupingBench method runGroupingBench.
@Ignore
@Test
public void runGroupingBench() {
// Simplified version of tests in runit_quantile_1_golden.R. There we test probs=seq(0,1,by=0.01)
//Vec vec = Vec.makeCon(1.1, 1000000000);
//Vec vec = Vec.makeRepSeq(10,10);
Vec vec = Vec.makeZero((long) 1e9);
//System.out.println("Chunks: " + vec.nChunks());
//System.out.println("Vec length: " + vec.length());
//System.out.println("Populating vector... ");
//new MySeq((int)100).doAll(vec);
//new MySample((int)10).doAll(vec);
new MySample((int) 10).doAll(vec);
// to cache rollups, so timing below excludes it
vec.max();
System.out.println("\nFirst 30 of vec ...");
System.out.println("There are " + vec.nChunks() + " chunks");
for (int i = 0; i < vec.nChunks(); i++) {
System.out.println("Chunk" + i + "is on" + vec.chunkKey(i).home_node());
}
CreateFrame cf = new CreateFrame();
cf.rows = 100;
cf.cols = 10;
cf.categorical_fraction = 0.1;
cf.integer_fraction = 1 - cf.categorical_fraction;
cf.binary_fraction = 0;
cf.factors = 4;
cf.response_factors = 2;
cf.positive_response = false;
cf.has_response = true;
cf.seed = 1234;
Frame frame = cf.execImpl().get();
System.out.print(frame.toString(0, 14));
for (int i = 0; i < 30; i++) System.out.print((int) vec.at(i) + " ");
System.out.println("\n");
// Vec vec = vec(5 , 8 , 9 , 12 , 13 , 16 , 18 , 23 , 27 , 28 , 30 , 31 , 33 , 34 , 43, 45, 48, 161);
// makeSeq;
// Take out memory alloc before the loop to avoid GC costs, before vtune profiling
// Now broken up into arrays of same shape as vec.chunks. Really cannot have one array of 1e9 items in Java.
// nanos = System.nanoTime();
long heapsize = Runtime.getRuntime().totalMemory();
System.out.println("heapsize is::" + heapsize);
//long o[] = new long[(int)vec.length()];
// [(int)vec.length()];
int[][] o = new int[vec.nChunks()][];
for (int c = 0; c < o.length; c++) o[c] = new int[vec.chunkForChunkIdx(c)._len];
for (int timeRep = 0; timeRep < 3; timeRep++) {
// TO DO: caliper java project
// TO DO: search for utils.Timer, prettyPrint
long nanos = System.nanoTime();
long[][] ans2 = new MyCountRange((long) vec.max(), (long) vec.min(), vec.nChunks()).doAll(vec)._counts;
long nanos1 = System.nanoTime() - nanos;
System.out.println("Counts per chunk (first 5 chunks) ...");
for (int c = 0; c < 5; c++) System.out.println(Arrays.toString(ans2[c]));
/*
nanos = System.nanoTime();
// cumulate across chunks
int nBuckets = (int)((long) vec.max() - (long) vec.min() + 1);
long rollSum = 0;
for (int b = 0; b < nBuckets; b++) {
for (int c = 0; c < vec.nChunks(); c++) {
long tmp = ans2[c][b];
ans2[c][b] = rollSum;
rollSum += tmp;
}
}
long nanos2 = System.nanoTime() - nanos;
//System.out.println("\nCounts after cumulate ...");
//for (int c = 0; c < vec.nChunks(); c++) System.out.println(Arrays.toString(ans2[c]));
nanos = System.nanoTime();
new WriteOrder(ans2, o, (long) vec.min(), (long) vec.max()).doAll(vec);
long nanos3 = System.nanoTime() - nanos;
//System.out.println("\nCounts after WriteOrder ...");
//for (int c = 0; c < vec.nChunks(); c++) System.out.println(Arrays.toString(ans2[c]));
System.out.println("\nFirst 10 of order ...");
//for (int i=0; i<10; i++) System.out.print(o[i] + " ");
for (int i=0; i<10; i++) System.out.print(o[0][i] + " ");
System.out.println("\nLast 10 of order ...");
//for (int i=9; i>=0; i--) System.out.print(o[(int)(vec.length()-i-1)] + " "); System.out.print("\n");
int c = vec.nChunks()-1;
long cstart = vec._espc[c];
for (int i=9; i>=0; i--) System.out.print(o[c][(int)(vec.length()-i-1-cstart)] + " "); System.out.print("\n");
System.out.println("\nFirst 40 of vec ...");
for (int i=0; i<40; i++) System.out.print((int)vec.at(i) + " ");
System.out.println("\nLast 40 of vec ...");
for (int i=39; i>=0; i--) System.out.print((int)vec.at((int)vec.length()-i-1) + " "); System.out.print("\n");
*/
System.out.println("\nInitial count: " + nanos1 / 1e9);
//System.out.println("Cumulate across chunks: " + nanos2 / 1e9);
//System.out.println("Write to order[]: " + nanos3 / 1e9);
//System.out.println("Total time: " + (nanos1+nanos2+nanos3) / 1e9);
System.out.println("");
}
// Next: input int, then large groups, small groups
vec.remove();
frame.delete();
}
Aggregations