Search in sources :

Example 1 with DistributedException

use of water.util.DistributedException in project h2o-3 by h2oai.

the class ParseSetupHandler method guessSetup.

public ParseSetupV3 guessSetup(int version, ParseSetupV3 p) {
    if (p.source_frames == null)
        throw new H2OIllegalArgumentException("No file names given for parsing.");
    Key[] fkeys = new Key[p.source_frames.length];
    for (int i = 0; i < p.source_frames.length; i++) {
        fkeys[i] = p.source_frames[i].key();
        if (DKV.get(fkeys[i]) == null)
            throw new IllegalArgumentException("Key not loaded: " + p.source_frames[i]);
    }
    // corrects for json putting in empty strings in the place of empty sub-arrays
    if (p.na_strings != null)
        for (int i = 0; i < p.na_strings.length; i++) if (p.na_strings[i] != null && p.na_strings[i].length == 0)
            p.na_strings[i] = null;
    ParseSetup ps;
    try {
        ps = ParseSetup.guessSetup(fkeys, new ParseSetup(p));
    } catch (Throwable ex) {
        Throwable ex2 = ex;
        if (ex instanceof DistributedException)
            ex2 = ex.getCause();
        if (ex2 instanceof ParseDataset.H2OParseException)
            throw new H2OIllegalArgumentException(ex2.getMessage());
        throw ex;
    }
    if (ps._errs != null && ps._errs.length > 0) {
        p.warnings = new String[ps._errs.length];
        for (int i = 0; i < ps._errs.length; ++i) p.warnings[i] = ps._errs[i].toString();
    }
    // TODO: ParseSetup throws away the srcs list. . .
    if ((null == p.column_name_filter || "".equals(p.column_name_filter)) && (0 == p.column_offset) && (0 == p.column_count)) {
        // return the entire data preview
        PojoUtils.copyProperties(p, ps, PojoUtils.FieldNaming.ORIGIN_HAS_UNDERSCORES, new String[] { "destination_key", "source_keys", "column_types", "parse_type" });
        p.total_filtered_column_count = p.number_columns;
    } else {
        // have to manually copy the desired parts of p.data to apply either column_name_filter or column pagination or both
        PojoUtils.copyProperties(p, ps, PojoUtils.FieldNaming.ORIGIN_HAS_UNDERSCORES, new String[] { "destination_key", "source_keys", "column_types", "data", "parse_type" });
        String[] all_col_names = ps.getColumnNames();
        String[][] data = ps.getData();
        ArrayList<Integer> keep_indexes = new ArrayList<>();
        if (null != p.column_name_filter && !"".equals(p.column_name_filter)) {
            // filter and then paginate columns
            Pattern pattern = Pattern.compile(p.column_name_filter);
            Matcher m = pattern.matcher("dummy");
            for (int column = 0; column < all_col_names.length; column++) {
                m.reset(all_col_names[column]);
                if (m.matches())
                    keep_indexes.add(column);
            }
        } else {
            // note: we do a little extra work below by treating this like the filter case, but the code is simpler
            for (int column = 0; column < all_col_names.length; column++) {
                keep_indexes.add(column);
            }
        }
        int width_to_return = Math.max(0, keep_indexes.size() - p.column_offset);
        if (p.column_count > 0)
            width_to_return = Math.min(width_to_return, p.column_count);
        String[][] filtered_data = new String[data.length][width_to_return];
        for (int row = 0; row < data.length; row++) {
            int output_column = 0;
            for (int input_column_index = p.column_offset; input_column_index < p.column_offset + width_to_return; input_column_index++) {
                // indirect through keep_indexes
                filtered_data[row][output_column++] = data[row][keep_indexes.get(input_column_index)];
            }
        }
        p.data = filtered_data;
        p.total_filtered_column_count = keep_indexes.size();
    }
    p.destination_frame = ParseSetup.createHexName(p.source_frames[0].toString());
    if (p.check_header == ParseSetup.HAS_HEADER && p.data != null && Arrays.equals(p.column_names, p.data[0]))
        p.data = Arrays.copyOfRange(p.data, 1, p.data.length);
    // Fill in data type names for each column.
    p.column_types = ps.getColumnTypeStrings();
    p.parse_type = ps.getParseType() != null ? ps.getParseType().name() : GUESS_INFO.name();
    return p;
}
Also used : Pattern(java.util.regex.Pattern) ParseSetup(water.parser.ParseSetup) DistributedException(water.util.DistributedException) Matcher(java.util.regex.Matcher) H2OIllegalArgumentException(water.exceptions.H2OIllegalArgumentException) ArrayList(java.util.ArrayList) ParseDataset(water.parser.ParseDataset) Key(water.Key) H2OIllegalArgumentException(water.exceptions.H2OIllegalArgumentException)

Aggregations

ArrayList (java.util.ArrayList)1 Matcher (java.util.regex.Matcher)1 Pattern (java.util.regex.Pattern)1 Key (water.Key)1 H2OIllegalArgumentException (water.exceptions.H2OIllegalArgumentException)1 ParseDataset (water.parser.ParseDataset)1 ParseSetup (water.parser.ParseSetup)1 DistributedException (water.util.DistributedException)1