Search in sources :

Example 6 with CreateFrame

use of hex.CreateFrame in project h2o-3 by h2oai.

the class CreateFrameHandler method run.

public JobV3 run(int version, CreateFrameV3 cf) {
    if (cf.dest == null) {
        cf.dest = new KeyV3.FrameKeyV3();
        cf.dest.name = Key.rand();
    }
    CreateFrame cfr = new CreateFrame(cf.dest.key());
    cf.fillImpl(cfr);
    return new JobV3(cfr.execImpl());
}
Also used : KeyV3(water.api.schemas3.KeyV3) JobV3(water.api.schemas3.JobV3) CreateFrame(hex.CreateFrame)

Example 7 with CreateFrame

use of hex.CreateFrame in project h2o-3 by h2oai.

the class ChunkSummaryTest method run.

@Test
public void run() {
    CreateFrame cf = new CreateFrame();
    cf.seed = 1234;
    Frame f = cf.execImpl().get();
    ChunkSummary cs = FrameUtils.chunkSummary(f);
    TwoDimTable chunk_summary_table = cs.toTwoDimTableChunkTypes();
    Log.info(chunk_summary_table);
    TwoDimTableV3 td = new TwoDimTableV3().fillFromImpl(chunk_summary_table);
    String json = td.toJsonString();
    //    if (H2O.CLOUD.size() == 1) {
    //      Assert.assertEquals("{\"__meta\":{\"schema_version\":3,\"schema_name\":\"TwoDimTableV3\"," +
    //          "\"schema_type\":\"TwoDimTable\"},\"name\":\"Chunk compression summary\",\"description\":\"\",\"columns\":[{\"__meta\":{\"schema_version\":-1,\"schema_name\":\"ColumnSpecsBase\",\"schema_type\":\"Iced\"},\"name\":\"chunk_type\",\"type\":\"string\",\"format\":\"%8s\",\"description\":\"Chunk Type\"},{\"__meta\":{\"schema_version\":-1,\"schema_name\":\"ColumnSpecsBase\",\"schema_type\":\"Iced\"},\"name\":\"chunk_name\",\"type\":\"string\",\"format\":\"%s\",\"description\":\"Chunk Name\"},{\"__meta\":{\"schema_version\":-1,\"schema_name\":\"ColumnSpecsBase\",\"schema_type\":\"Iced\"},\"name\":\"count\",\"type\":\"int\",\"format\":\"%10d\",\"description\":\"Count\"},{\"__meta\":{\"schema_version\":-1,\"schema_name\":\"ColumnSpecsBase\",\"schema_type\":\"Iced\"},\"name\":\"count_percentage\",\"type\":\"float\",\"format\":\"%10.3f %%\",\"description\":\"Count Percentage\"},{\"__meta\":{\"schema_version\":-1,\"schema_name\":\"ColumnSpecsBase\",\"schema_type\":\"Iced\"},\"name\":\"size\",\"type\":\"string\",\"format\":\"%10s\",\"description\":\"Size\"},{\"__meta\":{\"schema_version\":-1,\"schema_name\":\"ColumnSpecsBase\",\"schema_type\":\"Iced\"},\"name\":\"size_percentage\",\"type\":\"float\",\"format\":\"%10.3f %%\",\"description\":\"Size Percentage\"}],\"rowcount\":4,\"data\":[[\"CXI\",\"C1\",\"C1S\",\"C8D\"],[\"Sparse Integers\",\"1-Byte Integers\",\"1-Byte Fractions\",\"64-bit Reals\"],[9,18,18,45],[10.0,20.0,20.0,50.0],[\"    1.9 KB\",\"   20.7 KB\",\"   21.0 KB\",\"  393.6 KB\"],[0.42528477,4.7406745,4.8050036,90.02904]]}"
    //          ,json);
    //    }
    TwoDimTable distribution_summary_table = cs.toTwoDimTableDistribution();
    Log.info(distribution_summary_table);
    json = new TwoDimTableV3().fillFromImpl(distribution_summary_table).toJsonString();
    //    if (H2O.CLOUD.size() == 1) {
    //      Assert.assertEquals("{\"__meta\":{\"schema_version\":-1,\"schema_name\":\"TwoDimTableV3\"," +
    //          "\"schema_type\":\"TwoDimTable\"},\"name\":\"Frame distribution summary\",\"description\":\"\",\"columns\":[{\"__meta\":{\"schema_version\":-1,\"schema_name\":\"ColumnSpecsBase\",\"schema_type\":\"Iced\"},\"name\":\"\",\"type\":\"string\",\"format\":\"%s\",\"description\":\"\"},{\"__meta\":{\"schema_version\":-1,\"schema_name\":\"ColumnSpecsBase\",\"schema_type\":\"Iced\"},\"name\":\"size\",\"type\":\"string\",\"format\":\"%s\",\"description\":\"Size\"},{\"__meta\":{\"schema_version\":-1,\"schema_name\":\"ColumnSpecsBase\",\"schema_type\":\"Iced\"},\"name\":\"number_of_rows\",\"type\":\"float\",\"format\":\"%f\",\"description\":\"Number of Rows\"},{\"__meta\":{\"schema_version\":-1,\"schema_name\":\"ColumnSpecsBase\",\"schema_type\":\"Iced\"},\"name\":\"number_of_chunks_per_column\",\"type\":\"float\",\"format\":\"%f\",\"description\":\"Number of Chunks per Column\"},{\"__meta\":{\"schema_version\":-1,\"schema_name\":\"ColumnSpecsBase\",\"schema_type\":\"Iced\"},\"name\":\"number_of_chunks\",\"type\":\"float\",\"format\":\"%f\",\"description\":\"Number of Chunks\"}],\"rowcount\":6,\"data\":[[\"172.16.2.81:54321\",\"mean\",\"min\",\"max\",\"stddev\",\"total\"],[\"  436.9 KB\",\"  436.9 KB\",\"  436.9 KB\",\"  436.9 KB\",\"      0  B\",\"  436.9 KB\"],[10000.0,10000.0,10000.0,10000.0,0.0,10000.0],[9.0,9.0,9.0,9.0,0.0,9.0],[90.0,90.0,90.0,90.0,0.0,90.0]]}", json);
    //    }
    f.remove();
}
Also used : CreateFrame(hex.CreateFrame) TwoDimTableV3(water.api.schemas3.TwoDimTableV3) CreateFrame(hex.CreateFrame) Test(org.junit.Test)

Example 8 with CreateFrame

use of hex.CreateFrame in project h2o-3 by h2oai.

the class AggregatorTest method testAggregatorEigen.

@Test
public void testAggregatorEigen() {
    CreateFrame cf = new CreateFrame();
    cf.rows = 1000;
    cf.cols = 10;
    cf.categorical_fraction = 0.6;
    cf.integer_fraction = 0.0;
    cf.binary_fraction = 0.0;
    cf.real_range = 100;
    cf.integer_range = 100;
    cf.missing_fraction = 0;
    cf.factors = 5;
    cf.seed = 1234;
    Frame frame = cf.execImpl().get();
    AggregatorModel.AggregatorParameters parms = new AggregatorModel.AggregatorParameters();
    parms._train = frame._key;
    parms._categorical_encoding = Model.Parameters.CategoricalEncodingScheme.Eigen;
    long start = System.currentTimeMillis();
    // 0.905
    AggregatorModel agg = new Aggregator(parms).trainModel().get();
    System.out.println("AggregatorModel finished in: " + (System.currentTimeMillis() - start) / 1000. + " seconds");
    agg.checkConsistency();
    Frame output = agg._output._output_frame.get();
    System.out.println(output.toTwoDimTable(0, 10));
    Log.info("Number of exemplars: " + agg._exemplars.length);
    output.remove();
    frame.remove();
    agg.remove();
}
Also used : CreateFrame(hex.CreateFrame) Frame(water.fvec.Frame) Aggregator(hex.aggregator.Aggregator) CreateFrame(hex.CreateFrame) AggregatorModel(hex.aggregator.AggregatorModel) Test(org.junit.Test)

Example 9 with CreateFrame

use of hex.CreateFrame in project h2o-3 by h2oai.

the class TimeSeriesTests method setup.

@BeforeClass
public static void setup() {
    stall_till_cloudsize(1);
    cf = new CreateFrame();
    cf.rows = 2;
    cf.cols = 256;
    cf.binary_fraction = 0.0;
    cf.binary_ones_fraction = 0.0;
    cf.categorical_fraction = 0.0;
    cf.integer_fraction = 0.0;
    cf.missing_fraction = 0.0;
    cf.seed = 123;
    f = cf.execImpl().get();
}
Also used : CreateFrame(hex.CreateFrame) BeforeClass(org.junit.BeforeClass)

Example 10 with CreateFrame

use of hex.CreateFrame in project h2o-3 by h2oai.

the class GroupingBench method runGroupingBench.

@Ignore
@Test
public void runGroupingBench() {
    // Simplified version of tests in runit_quantile_1_golden.R. There we test probs=seq(0,1,by=0.01)
    //Vec vec = Vec.makeCon(1.1, 1000000000);
    //Vec vec = Vec.makeRepSeq(10,10);
    Vec vec = Vec.makeZero((long) 1e9);
    //System.out.println("Chunks: " + vec.nChunks());
    //System.out.println("Vec length: " + vec.length());
    //System.out.println("Populating vector... ");
    //new MySeq((int)100).doAll(vec);
    //new MySample((int)10).doAll(vec);
    new MySample((int) 10).doAll(vec);
    // to cache rollups,  so timing below excludes it
    vec.max();
    System.out.println("\nFirst 30 of vec ...");
    System.out.println("There are " + vec.nChunks() + " chunks");
    for (int i = 0; i < vec.nChunks(); i++) {
        System.out.println("Chunk" + i + "is on" + vec.chunkKey(i).home_node());
    }
    CreateFrame cf = new CreateFrame();
    cf.rows = 100;
    cf.cols = 10;
    cf.categorical_fraction = 0.1;
    cf.integer_fraction = 1 - cf.categorical_fraction;
    cf.binary_fraction = 0;
    cf.factors = 4;
    cf.response_factors = 2;
    cf.positive_response = false;
    cf.has_response = true;
    cf.seed = 1234;
    Frame frame = cf.execImpl().get();
    System.out.print(frame.toString(0, 14));
    for (int i = 0; i < 30; i++) System.out.print((int) vec.at(i) + " ");
    System.out.println("\n");
    // Vec vec = vec(5 , 8 ,  9 , 12 , 13 , 16 , 18 , 23 , 27 , 28 , 30 , 31 , 33 , 34 , 43,  45,  48, 161);
    // makeSeq;
    // Take out memory alloc before the loop to avoid GC costs, before vtune profiling
    // Now broken up into arrays of same shape as vec.chunks. Really cannot have one array of 1e9 items in Java.
    // nanos = System.nanoTime();
    long heapsize = Runtime.getRuntime().totalMemory();
    System.out.println("heapsize is::" + heapsize);
    //long o[] = new long[(int)vec.length()];
    // [(int)vec.length()];
    int[][] o = new int[vec.nChunks()][];
    for (int c = 0; c < o.length; c++) o[c] = new int[vec.chunkForChunkIdx(c)._len];
    for (int timeRep = 0; timeRep < 3; timeRep++) {
        // TO DO: caliper java project
        // TO DO:  search for utils.Timer,  prettyPrint
        long nanos = System.nanoTime();
        long[][] ans2 = new MyCountRange((long) vec.max(), (long) vec.min(), vec.nChunks()).doAll(vec)._counts;
        long nanos1 = System.nanoTime() - nanos;
        System.out.println("Counts per chunk (first 5 chunks) ...");
        for (int c = 0; c < 5; c++) System.out.println(Arrays.toString(ans2[c]));
        /*
            nanos = System.nanoTime();
            // cumulate across chunks
            int nBuckets = (int)((long) vec.max() - (long) vec.min() + 1);
            long rollSum = 0;
            for (int b = 0; b < nBuckets; b++) {
                for (int c = 0; c < vec.nChunks(); c++) {
                    long tmp = ans2[c][b];
                    ans2[c][b] = rollSum;
                    rollSum += tmp;
                }
            }
            long nanos2 = System.nanoTime() - nanos;
            //System.out.println("\nCounts after cumulate ...");
            //for (int c = 0; c < vec.nChunks(); c++) System.out.println(Arrays.toString(ans2[c]));

            nanos = System.nanoTime();
            new WriteOrder(ans2, o, (long) vec.min(), (long) vec.max()).doAll(vec);
            long nanos3 = System.nanoTime() - nanos;

            //System.out.println("\nCounts after WriteOrder ...");
            //for (int c = 0; c < vec.nChunks(); c++) System.out.println(Arrays.toString(ans2[c]));

            System.out.println("\nFirst 10 of order ...");
            //for (int i=0; i<10; i++) System.out.print(o[i] + " ");
            for (int i=0; i<10; i++) System.out.print(o[0][i] + " ");

            System.out.println("\nLast 10 of order ...");
            //for (int i=9; i>=0; i--) System.out.print(o[(int)(vec.length()-i-1)] + " "); System.out.print("\n");
            int c = vec.nChunks()-1;
            long cstart = vec._espc[c];
            for (int i=9; i>=0; i--) System.out.print(o[c][(int)(vec.length()-i-1-cstart)] + " "); System.out.print("\n");

            System.out.println("\nFirst 40 of vec ...");
            for (int i=0; i<40; i++) System.out.print((int)vec.at(i) + " ");
            System.out.println("\nLast 40 of vec ...");
            for (int i=39; i>=0; i--) System.out.print((int)vec.at((int)vec.length()-i-1) + " ");  System.out.print("\n");
            */
        System.out.println("\nInitial count: " + nanos1 / 1e9);
        //System.out.println("Cumulate across chunks: " + nanos2 / 1e9);
        //System.out.println("Write to order[]: " + nanos3 / 1e9);
        //System.out.println("Total time: " + (nanos1+nanos2+nanos3) / 1e9);
        System.out.println("");
    }
    // Next: input int, then large groups, small groups
    vec.remove();
    frame.delete();
}
Also used : CreateFrame(hex.CreateFrame) CreateFrame(hex.CreateFrame) Ignore(org.junit.Ignore) Test(org.junit.Test)

Aggregations

CreateFrame (hex.CreateFrame)10 Test (org.junit.Test)6 Frame (water.fvec.Frame)5 Aggregator (hex.aggregator.Aggregator)4 AggregatorModel (hex.aggregator.AggregatorModel)4 BeforeClass (org.junit.BeforeClass)1 Ignore (org.junit.Ignore)1 JobV3 (water.api.schemas3.JobV3)1 KeyV3 (water.api.schemas3.KeyV3)1 TwoDimTableV3 (water.api.schemas3.TwoDimTableV3)1