Search in sources :

Example 51 with Key

use of water.Key in project h2o-3 by h2oai.

the class ParseSetupHandler method guessSetup.

public ParseSetupV3 guessSetup(int version, ParseSetupV3 p) {
    if (p.source_frames == null)
        throw new H2OIllegalArgumentException("No file names given for parsing.");
    Key[] fkeys = new Key[p.source_frames.length];
    for (int i = 0; i < p.source_frames.length; i++) {
        fkeys[i] = p.source_frames[i].key();
        if (DKV.get(fkeys[i]) == null)
            throw new IllegalArgumentException("Key not loaded: " + p.source_frames[i]);
    }
    // corrects for json putting in empty strings in the place of empty sub-arrays
    if (p.na_strings != null)
        for (int i = 0; i < p.na_strings.length; i++) if (p.na_strings[i] != null && p.na_strings[i].length == 0)
            p.na_strings[i] = null;
    ParseSetup ps;
    try {
        ps = ParseSetup.guessSetup(fkeys, new ParseSetup(p));
    } catch (Throwable ex) {
        Throwable ex2 = ex;
        if (ex instanceof DistributedException)
            ex2 = ex.getCause();
        if (ex2 instanceof ParseDataset.H2OParseException)
            throw new H2OIllegalArgumentException(ex2.getMessage());
        throw ex;
    }
    if (ps._errs != null && ps._errs.length > 0) {
        p.warnings = new String[ps._errs.length];
        for (int i = 0; i < ps._errs.length; ++i) p.warnings[i] = ps._errs[i].toString();
    }
    // TODO: ParseSetup throws away the srcs list. . .
    if ((null == p.column_name_filter || "".equals(p.column_name_filter)) && (0 == p.column_offset) && (0 == p.column_count)) {
        // return the entire data preview
        PojoUtils.copyProperties(p, ps, PojoUtils.FieldNaming.ORIGIN_HAS_UNDERSCORES, new String[] { "destination_key", "source_keys", "column_types", "parse_type" });
        p.total_filtered_column_count = p.number_columns;
    } else {
        // have to manually copy the desired parts of p.data to apply either column_name_filter or column pagination or both
        PojoUtils.copyProperties(p, ps, PojoUtils.FieldNaming.ORIGIN_HAS_UNDERSCORES, new String[] { "destination_key", "source_keys", "column_types", "data", "parse_type" });
        String[] all_col_names = ps.getColumnNames();
        String[][] data = ps.getData();
        ArrayList<Integer> keep_indexes = new ArrayList<>();
        if (null != p.column_name_filter && !"".equals(p.column_name_filter)) {
            // filter and then paginate columns
            Pattern pattern = Pattern.compile(p.column_name_filter);
            Matcher m = pattern.matcher("dummy");
            for (int column = 0; column < all_col_names.length; column++) {
                m.reset(all_col_names[column]);
                if (m.matches())
                    keep_indexes.add(column);
            }
        } else {
            // note: we do a little extra work below by treating this like the filter case, but the code is simpler
            for (int column = 0; column < all_col_names.length; column++) {
                keep_indexes.add(column);
            }
        }
        int width_to_return = Math.max(0, keep_indexes.size() - p.column_offset);
        if (p.column_count > 0)
            width_to_return = Math.min(width_to_return, p.column_count);
        String[][] filtered_data = new String[data.length][width_to_return];
        for (int row = 0; row < data.length; row++) {
            int output_column = 0;
            for (int input_column_index = p.column_offset; input_column_index < p.column_offset + width_to_return; input_column_index++) {
                // indirect through keep_indexes
                filtered_data[row][output_column++] = data[row][keep_indexes.get(input_column_index)];
            }
        }
        p.data = filtered_data;
        p.total_filtered_column_count = keep_indexes.size();
    }
    p.destination_frame = ParseSetup.createHexName(p.source_frames[0].toString());
    if (p.check_header == ParseSetup.HAS_HEADER && p.data != null && Arrays.equals(p.column_names, p.data[0]))
        p.data = Arrays.copyOfRange(p.data, 1, p.data.length);
    // Fill in data type names for each column.
    p.column_types = ps.getColumnTypeStrings();
    p.parse_type = ps.getParseType() != null ? ps.getParseType().name() : GUESS_INFO.name();
    return p;
}
Also used : Pattern(java.util.regex.Pattern) ParseSetup(water.parser.ParseSetup) DistributedException(water.util.DistributedException) Matcher(java.util.regex.Matcher) H2OIllegalArgumentException(water.exceptions.H2OIllegalArgumentException) ArrayList(java.util.ArrayList) ParseDataset(water.parser.ParseDataset) Key(water.Key) H2OIllegalArgumentException(water.exceptions.H2OIllegalArgumentException)

Example 52 with Key

use of water.Key in project h2o-3 by h2oai.

the class GridSchemaV99 method fillFromImpl.

@Override
public GridSchemaV99 fillFromImpl(Grid grid) {
    Key<Model>[] gridModelKeys = grid.getModelKeys();
    // Return only keys which are referencing to existing objects in DKV
    // However, here is still implicit race, since we are sending
    // keys to client, but referenced models can be deleted in meantime
    // Hence, client has to be responsible for handling this situation
    // - call getModel and check for null model
    // pre-allocate
    List<Key<Model>> modelKeys = new ArrayList<>(gridModelKeys.length);
    for (Key k : gridModelKeys) {
        if (k != null && DKV.get(k) != null) {
            modelKeys.add(k);
        }
    }
    // Default sort order -- TODO: Outsource
    if (sort_by == null && modelKeys.size() > 0 && modelKeys.get(0) != null) {
        Model m = DKV.getGet(modelKeys.get(0));
        if (m != null && m.isSupervised()) {
            if (m._output.nclasses() > 1) {
                sort_by = "logloss";
                decreasing = false;
            } else {
                sort_by = "residual_deviance";
                decreasing = false;
            }
        }
    }
    // If not, show all possible metrics
    if (modelKeys.size() > 0 && sort_by != null) {
        Set<String> possibleMetrics = ModelMetrics.getAllowedMetrics(modelKeys.get(0));
        if (!possibleMetrics.contains(sort_by.toLowerCase())) {
            throw new H2OIllegalArgumentException("Invalid argument for sort_by specified. Must be one of: " + Arrays.toString(possibleMetrics.toArray(new String[0])));
        }
    }
    // Are we sorting by model metrics?
    if (null != sort_by && !sort_by.isEmpty()) {
        // sort the model keys
        modelKeys = ModelMetrics.sortModelsByMetric(sort_by, decreasing, modelKeys);
        // fill the metrics arrays
        training_metrics = new ModelMetricsBaseV3[modelKeys.size()];
        validation_metrics = new ModelMetricsBaseV3[modelKeys.size()];
        cross_validation_metrics = new ModelMetricsBaseV3[modelKeys.size()];
        cross_validation_metrics_summary = new TwoDimTableV3[modelKeys.size()];
        for (int i = 0; i < modelKeys.size(); i++) {
            Model m = DKV.getGet(modelKeys.get(i));
            if (m != null) {
                Model.Output o = m._output;
                if (null != o._training_metrics)
                    training_metrics[i] = (ModelMetricsBaseV3) SchemaServer.schema(3, o._training_metrics).fillFromImpl(o._training_metrics);
                if (null != o._validation_metrics)
                    validation_metrics[i] = (ModelMetricsBaseV3) SchemaServer.schema(3, o._validation_metrics).fillFromImpl(o._validation_metrics);
                if (null != o._cross_validation_metrics)
                    cross_validation_metrics[i] = (ModelMetricsBaseV3) SchemaServer.schema(3, o._cross_validation_metrics).fillFromImpl(o._cross_validation_metrics);
                if (o._cross_validation_metrics_summary != null)
                    cross_validation_metrics_summary[i] = new TwoDimTableV3(o._cross_validation_metrics_summary);
            }
        }
    }
    KeyV3.ModelKeyV3[] modelIds = new KeyV3.ModelKeyV3[modelKeys.size()];
    Key<Model>[] keys = new Key[modelKeys.size()];
    for (int i = 0; i < modelIds.length; i++) {
        modelIds[i] = new KeyV3.ModelKeyV3(modelKeys.get(i));
        keys[i] = modelIds[i].key();
    }
    grid_id = new KeyV3.GridKeyV3(grid._key);
    model_ids = modelIds;
    hyper_names = grid.getHyperNames();
    failed_params = toModelParametersSchema(grid.getFailedParameters());
    failure_details = grid.getFailureDetails();
    failure_stack_traces = grid.getFailureStackTraces();
    failed_raw_params = grid.getFailedRawParameters();
    TwoDimTable t = grid.createSummaryTable(keys, sort_by, decreasing);
    if (t != null)
        summary_table = new TwoDimTableV3().fillFromImpl(t);
    TwoDimTable h = grid.createScoringHistoryTable();
    if (h != null)
        scoring_history = new TwoDimTableV3().fillFromImpl(h);
    return this;
}
Also used : ArrayList(java.util.ArrayList) H2OIllegalArgumentException(water.exceptions.H2OIllegalArgumentException) TwoDimTable(water.util.TwoDimTable) Model(hex.Model) Key(water.Key)

Example 53 with Key

use of water.Key in project h2o-3 by h2oai.

the class ParseFolderTest method testSameFile.

@Test
public void testSameFile() {
    File f = FileUtils.locateFile("smalldata/iris/iris_wheader.csv");
    NFSFileVec nfs1 = NFSFileVec.make(f);
    NFSFileVec nfs2 = NFSFileVec.make(f);
    Frame fr = null;
    try {
        fr = ParseDataset.parse(Key.make(), new Key[] { nfs1._key, nfs2._key }, false, /*delete on done*/
        false, ParseSetup.GUESS_HEADER);
    } finally {
        if (fr != null)
            fr.delete();
        if (nfs1 != null)
            nfs1.remove();
    }
}
Also used : Frame(water.fvec.Frame) NFSFileVec(water.fvec.NFSFileVec) File(java.io.File) Key(water.Key)

Example 54 with Key

use of water.Key in project h2o-3 by h2oai.

the class ParseTimeTest method testDayParseNoTime2.

@Test
public void testDayParseNoTime2() {
    DateTimeZone pst = DateTimeZone.forID("America/Los_Angeles");
    DateTimeZone localTZ = DateTimeZone.getDefault();
    // Just mm/dd/yyyy, no time
    String data = "Date\n" + // Note evil trailing blanks
    "1/23/2014  \n" + "1/24/2014  \n" + "1/23/2014 \n" + "1/24/2014\n";
    Key k1 = ParserTest.makeByteVec(data);
    Key r1 = Key.make("r1");
    Frame fr = ParseDataset.parse(r1, k1);
    Assert.assertTrue(fr.vec(0).get_type_str().equals("Time"));
    long[] exp = new long[] { // Date, note: these ms counts all presume PST
    1390464000000L, 1390550400000L, 1390464000000L, 1390550400000L };
    for (// Adjust exp[] to local time
    int i = 0; // Adjust exp[] to local time
    i < exp.length; // Adjust exp[] to local time
    i++) exp[i] += pst.getOffset(exp[i]) - localTZ.getOffset(exp[i]);
    Vec vec = fr.vec("Date");
    for (int i = 0; i < exp.length; i++) Assert.assertEquals(exp[i], vec.at8(i));
    fr.delete();
}
Also used : DateTimeZone(org.joda.time.DateTimeZone) Key(water.Key)

Example 55 with Key

use of water.Key in project h2o-3 by h2oai.

the class ParseTimeTest method testTimeParseNoDate.

@Test
public void testTimeParseNoDate() {
    // Just time, no date.  Parses as msec on the Unix Epoch start date, i.e. <24*60*60*1000
    // HH:mm:ss.S          // tenths second
    // HH:mm:ss.SSS        // milliseconds
    // HH:mm:ss.SSSnnnnnn  // micros and nanos also
    String data = "Time\n" + "0:0:0.0\n" + "0:54:13.0\n" + "10:36:2.0\n" + "10:36:8.0\n" + "10:37:49.0\n" + "11:18:48.0\n" + "11:41:34.0\n" + "11:4:49.0\n" + "12:47:41.0\n" + "3:24:19.0\n" + "3:45:55.0\n" + "3:45:56.0\n" + "3:58:24.0\n" + "6:13:55.0\n" + "6:25:14.0\n" + "7:0:15.0\n" + "7:3:8.0\n" + "8:20:8.0\n";
    Key k1 = ParserTest.makeByteVec(data);
    Key r1 = Key.make("r1");
    Frame fr = ParseDataset.parse(r1, k1);
    Assert.assertTrue(fr.vec(0).get_type_str().equals("Time"));
    long[] exp = new long[] { // Notice: no TZ at all ==> GMT!
    0L, 3253000L, 38162000L, 38168000L, 38269000L, 40728000L, 42094000L, 39889000L, 46061000L, 12259000L, 13555000L, 13556000L, 14304000L, 22435000L, 23114000L, 25215000L, 25388000L, 30008000L };
    Vec vec = fr.vec("Time");
    for (int i = 0; i < exp.length; i++) Assert.assertEquals(exp[i], vec.at8(i));
    fr.delete();
}
Also used : Key(water.Key)

Aggregations

Key (water.Key)94 Frame (water.fvec.Frame)56 Test (org.junit.Test)42 Vec (water.fvec.Vec)21 File (java.io.File)18 NFSFileVec (water.fvec.NFSFileVec)17 Futures (water.Futures)10 Random (java.util.Random)7 H2OIllegalArgumentException (water.exceptions.H2OIllegalArgumentException)6 ValFrame (water.rapids.vals.ValFrame)6 DateTimeZone (org.joda.time.DateTimeZone)5 Model (hex.Model)4 SplitFrame (hex.SplitFrame)4 DeepLearning (hex.deeplearning.DeepLearning)4 DeepLearningModel (hex.deeplearning.DeepLearningModel)4 AppendableVec (water.fvec.AppendableVec)4 NewChunk (water.fvec.NewChunk)4 Grid (hex.grid.Grid)3 IOException (java.io.IOException)3 ArrayList (java.util.ArrayList)3