Search in sources :

Example 1 with ParseSetup

use of water.parser.ParseSetup in project h2o-3 by h2oai.

the class ParseSetupV3 method fillImpl.

public ParseSetup fillImpl(ParseSetup impl) {
    ParseSetup parseSetup = fillImpl(impl, new String[] { "parse_type" });
    // Transform the field parse_type
    ParserInfo pi = GUESS_INFO;
    if (this.parse_type != null) {
        ParserProvider pp = ParserService.INSTANCE.getByName(this.parse_type);
        if (pp != null) {
            pi =;
        } else
            throw new H2OIllegalValueException("Cannot find right parser for specified parser type!", this.parse_type);
    return parseSetup;
Also used : ParseSetup(water.parser.ParseSetup) ParserInfo(water.parser.ParserInfo) ParserProvider(water.parser.ParserProvider) H2OIllegalValueException(water.exceptions.H2OIllegalValueException)

Example 2 with ParseSetup

use of water.parser.ParseSetup in project h2o-3 by h2oai.

the class RapidsTest method testChicago.

public void testChicago() {
    String oldtz = Rapids.exec("(getTimeZone)").getStr();
    Session ses = new Session();
    try {
        parse_test_file(Key.make("weather.hex"), "smalldata/chicago/chicagoAllWeather.csv");
        parse_test_file(Key.make("crimes.hex"), "smalldata/chicago/");
        String fname = "smalldata/chicago/chicagoCensus.csv";
        File f = FileUtils.locateFile(fname);
        assert f != null && f.exists() : " file not found: " + fname;
        NFSFileVec nfs = NFSFileVec.make(f);
        ParseSetup ps = ParseSetup.guessSetup(new Key[] { nfs._key }, false, 1);
        ps.getColumnTypes()[1] = Vec.T_CAT;
        ParseDataset.parse(Key.make("census.hex"), new Key[] { nfs._key }, true, ps);
        exec_str("(assign census.hex (colnames= census.hex\t[0 1 2 3 4 5 6 7 8] \n" + "['Community.Area.Number' 'COMMUNITY.AREA.NAME' \"PERCENT.OF.HOUSING.CROWDED\" \r\n" + " \"PERCENT.HOUSEHOLDS.BELOW.POVERTY\" \"PERCENT.AGED.16..UNEMPLOYED\" " + " \"PERCENT.AGED.25..WITHOUT.HIGH.SCHOOL.DIPLOMA\" \"PERCENT.AGED.UNDER.18.OR.OVER.64\" " + " \"PER.CAPITA.INCOME.\" \"HARDSHIP.INDEX\"]))", ses);
        exec_str("(assign crimes.hex (colnames= crimes.hex [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21] [\"ID\" \"Case.Number\" \"Date\" \"Block\" \"IUCR\" \"Primary.Type\" \"Description\" \"Location.Description\" \"Arrest\" \"Domestic\" \"Beat\" \"District\" \"Ward\" \"Community.Area\" \"FBI.Code\" \"X.Coordinate\" \"Y.Coordinate\" \"Year\" \"Updated.On\" \"Latitude\" \"Longitude\" \"Location\"]))", ses);
        exec_str("(setTimeZone \"Etc/UTC\")", ses);
        exec_str("(assign crimes.hex (append crimes.hex (tmp= unary_op_6 (day (tmp= nary_op_5 (cols crimes.hex [2])))) \"Day\"))", ses);
        exec_str("(assign crimes.hex (append crimes.hex (tmp= binary_op_31 (+ (tmp= unary_op_7 (month nary_op_5)) 1)) \"Month\"))", ses);
        exec_str("(rm nary_op_30)", ses);
        exec_str("(assign crimes.hex (append crimes.hex (tmp= binary_op_32 (+ (tmp= binary_op_9 (- (tmp= unary_op_8 (year nary_op_5)) 1900)) 1900)) \"Year\"))", ses);
        exec_str("(assign crimes.hex (append crimes.hex (tmp= unary_op_10 (week nary_op_5)) \"WeekNum\"))", ses);
        exec_str("(rm binary_op_32)", ses);
        exec_str("(rm binary_op_31)", ses);
        exec_str("(rm unary_op_8)", ses);
        exec_str("(assign crimes.hex (append crimes.hex (tmp= unary_op_11 (dayOfWeek nary_op_5)) \"WeekDay\"))", ses);
        exec_str("(rm 'nfs:\\\\C:\\\\Users\\\\cliffc\\\\Desktop\\\\h2o-3\\\\smalldata\\\\chicago\\\\')", ses);
        exec_str("(assign crimes.hex (append crimes.hex (tmp= unary_op_12 (hour nary_op_5)) \"HourOfDay\"))", ses);
        exec_str("(assign crimes.hex (append crimes.hex (tmp= nary_op_16 (ifelse (tmp= binary_op_15 (| (tmp= binary_op_13 (== unary_op_11 \"Sun\")) (tmp= binary_op_14 (== unary_op_11 \"Sat\")))) 1 0)) \"Weekend\"))", ses);
        // Season is incorrectly assigned in the original chicago demo; picks up the Weekend flag
        exec_str("(assign crimes.hex (append crimes.hex nary_op_16 \"Season\"))", ses);
        // Standard "head of 10 rows" pattern for printing
        exec_str("(tmp= subset_33 (rows crimes.hex [0:10]))", ses);
        exec_str("(rm subset_33)", ses);
        exec_str("(rm subset_33)", ses);
        exec_str("(rm unary_op_29)", ses);
        exec_str("(rm nary_op_28)", ses);
        exec_str("(rm nary_op_27)", ses);
        exec_str("(rm nary_op_26)", ses);
        exec_str("(rm binary_op_25)", ses);
        exec_str("(rm binary_op_24)", ses);
        exec_str("(rm binary_op_23)", ses);
        exec_str("(rm binary_op_22)", ses);
        exec_str("(rm binary_op_21)", ses);
        exec_str("(rm binary_op_20)", ses);
        exec_str("(rm binary_op_19)", ses);
        exec_str("(rm binary_op_18)", ses);
        exec_str("(rm binary_op_17)", ses);
        exec_str("(rm nary_op_16)", ses);
        exec_str("(rm binary_op_15)", ses);
        exec_str("(rm binary_op_14)", ses);
        exec_str("(rm binary_op_13)", ses);
        exec_str("(rm unary_op_12)", ses);
        exec_str("(rm unary_op_11)", ses);
        exec_str("(rm unary_op_10)", ses);
        exec_str("(rm binary_op_9)", ses);
        exec_str("(rm unary_op_8)", ses);
        exec_str("(rm unary_op_7)", ses);
        exec_str("(rm unary_op_6)", ses);
        exec_str("(rm nary_op_5)", ses);
        // Standard "head of 10 rows" pattern for printing
        exec_str("(tmp= subset_34 (rows crimes.hex [0:10]))", ses);
        exec_str("(rm subset_34)", ses);
        exec_str("(assign census.hex (colnames= census.hex [0 1 2 3 4 5 6 7 8] [\"Community.Area\" \"COMMUNITY.AREA.NAME\" \"PERCENT.OF.HOUSING.CROWDED\" \"PERCENT.HOUSEHOLDS.BELOW.POVERTY\" \"PERCENT.AGED.16..UNEMPLOYED\" \"PERCENT.AGED.25..WITHOUT.HIGH.SCHOOL.DIPLOMA\" \"PERCENT.AGED.UNDER.18.OR.OVER.64\" \"PER.CAPITA.INCOME.\" \"HARDSHIP.INDEX\"]))", ses);
        exec_str("(rm subset_34)", ses);
        exec_str("(tmp= subset_35 (cols  crimes.hex [-3]))", ses);
        exec_str("(tmp= subset_36 (cols weather.hex [-1]))", ses);
        exec_str("(tmp= subset_36_2 (colnames= subset_36 [0 1 2 3 4 5] [\"Month\" \"Day\" \"Year\" \"maxTemp\" \"meanTemp\" \"minTemp\"]))", ses);
        exec_str("(rm crimes.hex)", ses);
        exec_str("(rm weather.hex)", ses);
        // nary_op_37 = merge( X Y ); Vecs in X & nary_op_37 shared
        exec_str("(tmp= nary_op_37 (merge subset_35 census.hex TRUE FALSE [] [] \"auto\"))", ses);
        // nary_op_38 = merge( nary_op_37 subset_36_2); Vecs in nary_op_38 and nary_pop_37 and X shared
        exec_str("(tmp= subset_41 (rows (tmp= nary_op_38 (merge nary_op_37 subset_36_2 TRUE FALSE [] [] \"auto\")) (tmp= binary_op_40 (<= (tmp= nary_op_39 (h2o.runif nary_op_38 30792152736.5179)) 0.8))))", ses);
        // Standard "head of 10 rows" pattern for printing
        exec_str("(tmp= subset_44 (rows subset_41 [0:10]))", ses);
        exec_str("(rm subset_44)", ses);
        exec_str("(rm subset_44)", ses);
        exec_str("(rm binary_op_40)", ses);
        exec_str("(rm nary_op_37)", ses);
        exec_str("(tmp= subset_43 (rows nary_op_38 (tmp= binary_op_42 (> nary_op_39 0.8))))", ses);
        // Chicago demo continues on past, but this is all I've captured for now
    } catch (Throwable ex) {
        throw ses.endQuietly(ex);
    } finally {
        // Restore time zone (which is global, and will affect following tests)
        Rapids.exec("(setTimeZone \"" + oldtz + "\")");
        for (String s : new String[] { "weather.hex", "crimes.hex", "census.hex", "nary_op_5", "unary_op_6", "unary_op_7", "unary_op_8", "binary_op_9", "unary_op_10", "unary_op_11", "unary_op_12", "binary_op_13", "binary_op_14", "binary_op_15", "nary_op_16", "binary_op_17", "binary_op_18", "binary_op_19", "binary_op_20", "binary_op_21", "binary_op_22", "binary_op_23", "binary_op_24", "binary_op_25", "nary_op_26", "nary_op_27", "nary_op_28", "unary_op_29", "binary_op_30", "binary_op_31", "binary_op_32", "subset_33", "subset_34", "subset_35", "subset_36", "subset_36_2", "nary_op_37", "nary_op_38", "nary_op_39", "binary_op_40", "subset_41", "binary_op_42", "subset_43", "subset_44" }) Keyed.remove(Key.make(s));
Also used : ParseSetup(water.parser.ParseSetup) NFSFileVec(water.fvec.NFSFileVec) File( Test(org.junit.Test)

Example 3 with ParseSetup

use of water.parser.ParseSetup in project h2o-3 by h2oai.

the class ParseHandler method parseSVMLight.

// called through reflection by RequestServer
public JobV3 parseSVMLight(int version, ParseSVMLightV3 parse) {
    Key[] fkeys = new Key[parse.source_frames.length];
    for (int i = 0; i < fkeys.length; ++i) fkeys[i] = parse.source_frames[i].key();
    Key<Frame> destKey = parse.destination_frame == null ? null : parse.destination_frame.key();
    if (destKey == null)
        destKey = Key.make(ParseSetup.createHexName(parse.source_frames[0].toString()));
    ParseSetup setup = ParseSetup.guessSetup(fkeys, ParseSetup.makeSVMLightSetup());
    return new JobV3().fillFromImpl(ParseDataset.forkParseSVMLight(destKey, fkeys, setup));
Also used : Frame(water.fvec.Frame) ParseSetup(water.parser.ParseSetup) JobV3(water.api.schemas3.JobV3) Key(water.Key)

Example 4 with ParseSetup

use of water.parser.ParseSetup in project h2o-3 by h2oai.

the class TestUtil method parse_test_file.

protected Frame parse_test_file(String fname, String na_string, int check_header, byte[] column_types) {
    NFSFileVec nfs = makeNfsFileVec(fname);
    Key[] res = { nfs._key };
    // create new parseSetup in order to store our na_string
    ParseSetup p = ParseSetup.guessSetup(res, new ParseSetup(DefaultParserProviders.GUESS_INFO, (byte) ',', true, check_header, 0, null, null, null, null, null));
    // add the na_strings into p.
    if (na_string != null) {
        int column_number = p.getColumnTypes().length;
        int na_length = na_string.length() - 1;
        String[][] na_strings = new String[column_number][na_length + 1];
        for (int index = 0; index < column_number; index++) {
            na_strings[index][na_length] = na_string;
    if (column_types != null)
    return ParseDataset.parse(Key.make(), res, true, p);
Also used : ParseSetup(water.parser.ParseSetup) BufferedString(water.parser.BufferedString)

Example 5 with ParseSetup

use of water.parser.ParseSetup in project h2o-3 by h2oai.

the class TestUtil method parse_test_folder.

   * Parse a folder with csv files when a single na_string is specified.
   * @param fname name of folder
   * @param na_string string for NA in a column
   * @return
protected static Frame parse_test_folder(String fname, String na_string, int check_header, byte[] column_types) {
    File folder = FileUtils.locateFile(fname);
    File[] files = contentsOf(fname, folder);
    ArrayList<Key> keys = new ArrayList<>();
    for (File f : files) if (f.isFile())
    Key[] res = new Key[keys.size()];
    // generated the necessary key here
    // create new parseSetup in order to store our na_string
    ParseSetup p = ParseSetup.guessSetup(res, new ParseSetup(DefaultParserProviders.GUESS_INFO, (byte) ',', true, check_header, 0, null, null, null, null, null));
    // add the na_strings into p.
    if (na_string != null) {
        int column_number = p.getColumnTypes().length;
        int na_length = na_string.length() - 1;
        String[][] na_strings = new String[column_number][na_length + 1];
        for (int index = 0; index < column_number; index++) {
            na_strings[index][na_length] = na_string;
    if (column_types != null)
    return ParseDataset.parse(Key.make(), res, true, p);
Also used : ParseSetup(water.parser.ParseSetup) ArrayList(java.util.ArrayList) BufferedString(water.parser.BufferedString)


ParseSetup (water.parser.ParseSetup)7 Key (water.Key)3 ArrayList (java.util.ArrayList)2 JobV3 (water.api.schemas3.JobV3)2 H2OIllegalArgumentException (water.exceptions.H2OIllegalArgumentException)2 Frame (water.fvec.Frame)2 BufferedString (water.parser.BufferedString)2 ParserInfo (water.parser.ParserInfo)2 File ( Matcher (java.util.regex.Matcher)1 Pattern (java.util.regex.Pattern)1 Test (org.junit.Test)1 H2OIllegalValueException (water.exceptions.H2OIllegalValueException)1 NFSFileVec (water.fvec.NFSFileVec)1 ParseDataset (water.parser.ParseDataset)1 ParseWriter (water.parser.ParseWriter)1 ParserProvider (water.parser.ParserProvider)1 DistributedException (water.util.DistributedException)1