Search in sources :

Example 46 with Key

use of water.Key in project h2o-2 by h2oai.

the class FrameUtils method parseFrame.

/** Parse given file into the form of frame represented by the given key.
   *
   * @param okey  destination key for parsed frame
   * @param files  files to parse
   * @return a new frame
   */
public static Frame parseFrame(Key okey, File... files) {
    assert files.length > 0 : "Ups. No files to parse!";
    for (File f : files) if (!f.exists())
        throw new RuntimeException("File not found " + f);
    // Create output key if not specified
    if (okey == null)
        okey = Key.make(files[0].getName());
    Key[] fkeys = new Key[files.length];
    int cnt = 0;
    for (File f : files) fkeys[cnt++] = NFSFileVec.make(f);
    return parseFrame(okey, fkeys);
}
Also used : File(java.io.File) Key(water.Key)

Example 47 with Key

use of water.Key in project h2o-3 by h2oai.

the class AvroParserProvider method createParserSetup.

@Override
public ParseSetup createParserSetup(Key[] inputs, ParseSetup requiredSetup) {
    // Also expect that files are not compressed
    assert inputs != null && inputs.length > 0 : "Inputs cannot be empty!";
    Key firstInput = inputs[0];
    Iced ice = DKV.getGet(firstInput);
    if (ice == null)
        throw new H2OIllegalArgumentException("Missing data", "Did not find any data under key " + firstInput);
    ByteVec bv = (ByteVec) (ice instanceof ByteVec ? ice : ((Frame) ice).vecs()[0]);
    byte[] bits = bv.getFirstBytes();
    try {
        AvroParser.AvroInfo avroInfo = AvroParser.extractAvroInfo(bits, requiredSetup);
        return new AvroParser.AvroParseSetup(requiredSetup, avroInfo.header, avroInfo.firstBlockSize, avroInfo.domains);
    } catch (Throwable e) {
        throw new H2OIllegalArgumentException("Wrong data", "Cannot find Avro header in input file: " + firstInput, e);
    }
}
Also used : Frame(water.fvec.Frame) H2OIllegalArgumentException(water.exceptions.H2OIllegalArgumentException) Iced(water.Iced) ByteVec(water.fvec.ByteVec) Key(water.Key)

Example 48 with Key

use of water.Key in project h2o-3 by h2oai.

the class PersistHdfs method load.

/** InputStream from a HDFS-based Key */
/*public static InputStream openStream(Key k, Job pmon) throws IOException {
    H2OHdfsInputStream res = null;
    Path p = new Path(k.toString());
    try {
      res = new H2OHdfsInputStream(p, 0, pmon);
    } catch( IOException e ) {
      try {
        Thread.sleep(1000);
      } catch( Exception ex ) {}
      Log.warn("Error while opening HDFS key " + k.toString() + ", will wait and retry.");
      res = new H2OHdfsInputStream(p, 0, pmon);
    }
    return res;
  }*/
@Override
public byte[] load(final Value v) {
    //
    // !!! WARNING !!!
    //
    // tomk: Sun Apr 19 13:11:51 PDT 2015
    //
    //
    // This load implementation behaved *HORRIBLY* with S3 when the libraries were updated.
    //    Behaves well (and is the same set of libraries as H2O-1):
    //        org.apache.hadoop:hadoop-client:2.0.0-cdh4.3.0
    //        net.java.dev.jets3t:jets3t:0.6.1
    //
    //    Behaves abysmally:
    //        org.apache.hadoop:hadoop-client:2.5.0-cdh5.2.0
    //        net.java.dev.jets3t:jets3t:0.9.2
    //
    //
    // I did some debugging.
    //
    // What happens in the new libraries is the connection type is a streaming connection, and
    // the entire file gets read on close() even if you only wanted to read a chunk.  The result
    // is the same data gets read over and over again by the underlying transport layer even
    // though H2O only thinks it's asking for (and receiving) each piece of data once.
    //
    // I suspect this has something to do with the 'Range' HTTP header on the GET, but I'm not
    // entirely sure.  Many layers of library need to be fought through to really figure it out.
    //
    // Anyway, this will need to be rewritten from the perspective of how to properly use the
    // new library version.  Might make sense to go to straight to 's3a' which is a replacement
    // for 's3n'.
    //
    long end, start = System.currentTimeMillis();
    final byte[] b = MemoryManager.malloc1(v._max);
    Key k = v._key;
    long skip = k.isChunkKey() ? water.fvec.NFSFileVec.chunkOffset(k) : 0;
    final Path p = _iceRoot == null ? new Path(getPathForKey(k)) : new Path(_iceRoot, getIceName(v));
    final long skip_ = skip;
    run(new Callable() {

        @Override
        public Object call() throws Exception {
            FileSystem fs = FileSystem.get(p.toUri(), CONF);
            FSDataInputStream s = null;
            try {
                //          fs.getDefaultBlockSize(p);
                s = fs.open(p);
                //          System.out.println(Arrays.toString(bs));
                if (p.toString().toLowerCase().startsWith("maprfs:")) {
                    // MapR behaves really horribly with the google ByteStreams code below.
                    // Instead of skipping by seeking, it skips by reading and dropping.  Very bad.
                    // Use the HDFS API here directly instead.
                    s.seek(skip_);
                    s.readFully(b);
                } else {
                    // NOTE:
                    // The following line degrades performance of HDFS load from S3 API: s.readFully(skip,b,0,b.length);
                    // Google API's simple seek has better performance
                    // Load of 300MB file via Google API ~ 14sec, via s.readFully ~ 5min (under the same condition)
                    //            ByteStreams.skipFully(s, skip_);
                    //            ByteStreams.readFully(s, b);
                    s.seek(skip_);
                    s.readFully(b);
                }
                assert v.isPersisted();
            } finally {
                s.getWrappedStream().close();
                FileUtils.close(s);
            }
            return null;
        }
    }, true, v._max);
    end = System.currentTimeMillis();
    if (// Only log read that took over 1 second to complete
    end - start > 1000)
        Log.debug("Slow Read: " + (end - start) + " millis to get bytes " + skip_ + "-" + (skip_ + b.length) + " in HDFS read.");
    return b;
}
Also used : Key(water.Key) FileVec.getPathForKey(water.fvec.FileVec.getPathForKey) Callable(java.util.concurrent.Callable) URISyntaxException(java.net.URISyntaxException) SocketTimeoutException(java.net.SocketTimeoutException) HDFSIOException(water.api.HDFSIOException) IOException(java.io.IOException) EOFException(java.io.EOFException)

Example 49 with Key

use of water.Key in project h2o-3 by h2oai.

the class GridsHandler method list.

/**
   * Return all the grids.
   */
// called through reflection by RequestServer
@SuppressWarnings("unused")
public GridsV99 list(int version, GridsV99 s) {
    final Key[] gridKeys = KeySnapshot.globalSnapshot().filter(new KeySnapshot.KVFilter() {

        @Override
        public boolean filter(KeySnapshot.KeyInfo k) {
            return Value.isSubclassOf(k._type, Grid.class);
        }
    }).keys();
    s.grids = new GridSchemaV99[gridKeys.length];
    for (int i = 0; i < gridKeys.length; i++) {
        s.grids[i] = new GridSchemaV99();
        s.grids[i].fillFromImpl(getFromDKV("(none)", gridKeys[i], Grid.class));
    }
    return s;
}
Also used : Grid(hex.grid.Grid) GridSchemaV99(hex.schemas.GridSchemaV99) Key(water.Key)

Example 50 with Key

use of water.Key in project h2o-3 by h2oai.

the class ParseHandler method parse.

// Entry point for parsing.
// called through reflection by RequestServer
@SuppressWarnings("unused")
public ParseV3 parse(int version, ParseV3 parse) {
    ParserInfo parserInfo = ParserService.INSTANCE.getByName(parse.parse_type).info();
    ParseSetup setup = new ParseSetup(parserInfo, parse.separator, parse.single_quotes, parse.check_header, parse.number_columns, delNulls(parse.column_names), ParseSetup.strToColumnTypes(parse.column_types), parse.domains, parse.na_strings, null, new ParseWriter.ParseErr[0], parse.chunk_size);
    if (parse.source_frames == null)
        throw new H2OIllegalArgumentException("Data for Frame '" + parse.destination_frame.name + "' is not available. Please check that the path is valid (for all H2O nodes).'");
    Key[] srcs = new Key[parse.source_frames.length];
    for (int i = 0; i < parse.source_frames.length; i++) srcs[i] = parse.source_frames[i].key();
    parse.job = new JobV3(ParseDataset.parse(parse.destination_frame.key(), srcs, parse.delete_on_done, setup, parse.blocking)._job);
    if (parse.blocking) {
        Frame fr = DKV.getGet(parse.destination_frame.key());
        parse.rows = fr.numRows();
    }
    return parse;
}
Also used : Frame(water.fvec.Frame) ParseSetup(water.parser.ParseSetup) H2OIllegalArgumentException(water.exceptions.H2OIllegalArgumentException) ParseWriter(water.parser.ParseWriter) ParserInfo(water.parser.ParserInfo) JobV3(water.api.schemas3.JobV3) Key(water.Key)

Aggregations

Key (water.Key)94 Frame (water.fvec.Frame)56 Test (org.junit.Test)42 Vec (water.fvec.Vec)21 File (java.io.File)18 NFSFileVec (water.fvec.NFSFileVec)17 Futures (water.Futures)10 Random (java.util.Random)7 H2OIllegalArgumentException (water.exceptions.H2OIllegalArgumentException)6 ValFrame (water.rapids.vals.ValFrame)6 DateTimeZone (org.joda.time.DateTimeZone)5 Model (hex.Model)4 SplitFrame (hex.SplitFrame)4 DeepLearning (hex.deeplearning.DeepLearning)4 DeepLearningModel (hex.deeplearning.DeepLearningModel)4 AppendableVec (water.fvec.AppendableVec)4 NewChunk (water.fvec.NewChunk)4 Grid (hex.grid.Grid)3 IOException (java.io.IOException)3 ArrayList (java.util.ArrayList)3