use of water.parser.BufferedString in project h2o-3 by h2oai.
the class IcedHasMapGenericTest method testSerialization.
@Test
public void testSerialization() {
IcedHashMapGeneric m = new IcedHashMapGeneric();
// String -> String pair
m.put("haha", "gaga");
// String -> String pair
m.put("str->freezable", new IcedDouble(3.14));
// String -> String pair
m.put("str->freezable[]", new Freezable[] { new IcedDouble(3.14) });
// String -> String pair
m.put("str->Integer", 314);
// Freezable -> String pair
m.put(new BufferedString("haha2"), "gaga");
// String -> String pair
m.put(new BufferedString("str->freezable2"), new IcedDouble(3.14));
// String -> String pair
m.put(new BufferedString("str->freezable[]2"), new Freezable[] { new IcedDouble(3.14) });
// String -> String pair
m.put(new BufferedString("str->Integer2"), 314);
// String -> String pair
m.put(new IcedLong(1234), 1234);
byte[] buf = new AutoBuffer().put(m).buf();
IcedHashMapGeneric m2 = new AutoBuffer(buf).get();
assertEquals(m.size(), m2.size());
Set<Map.Entry> entries = m.entrySet();
for (Map.Entry e : entries) {
if (e.getValue() instanceof Freezable[])
assert Arrays.deepEquals((Freezable[]) e.getValue(), (Freezable[]) m2.get(e.getKey()));
else
assertEquals(e.getValue(), m2.get(e.getKey()));
}
}
use of water.parser.BufferedString in project h2o-3 by h2oai.
the class TestUtil method assertStringVecEquals.
public static void assertStringVecEquals(Vec expecteds, Vec actuals) {
assertEquals(expecteds.length(), actuals.length());
assertEquals("Vec types match", expecteds.get_type_str(), actuals.get_type_str());
for (int i = 0; i < expecteds.length(); i++) {
String expected = toStr(expecteds.atStr(new BufferedString(), i));
String actual = toStr(actuals.atStr(new BufferedString(), i));
final String message = i + ": " + expected + " != " + actual + ", chunkIds = " + expecteds.elem2ChunkIdx(i) + ", " + actuals.elem2ChunkIdx(i) + ", row in chunks = " + (i - expecteds.chunkForRow(i).start()) + ", " + (i - actuals.chunkForRow(i).start());
assertEquals(message, expected, actual);
}
}
use of water.parser.BufferedString in project h2o-3 by h2oai.
the class VecUtils method stringToCategorical.
/**
* Create a new {@link Vec} of categorical values from string {@link Vec}.
*
* FIXME: implement in more efficient way with Brandon's primitives for BufferedString manipulation
*
* @param vec a string {@link Vec}
* @return a categorical {@link Vec}
*/
public static Vec stringToCategorical(Vec vec) {
final String[] vecDomain = new CollectStringVecDomain().domain(vec);
MRTask task = new MRTask() {
private transient java.util.HashMap<String, Integer> lookupTable;
@Override
protected void setupLocal() {
lookupTable = new java.util.HashMap<>(vecDomain.length);
for (int i = 0; i < vecDomain.length; i++) {
// FIXME: boxing
lookupTable.put(vecDomain[i], i);
}
}
@Override
public void map(Chunk c, NewChunk nc) {
BufferedString bs = new BufferedString();
for (int row = 0; row < c.len(); row++) {
if (c.isNA(row)) {
nc.addNA();
} else {
c.atStr(bs, row);
nc.addNum(lookupTable.get(bs.bytesToString()), 0);
}
}
}
};
// Invoke tasks - one input vector, one ouput vector
task.doAll(new byte[] { Vec.T_CAT }, vec);
// Return result
return task.outputFrame(null, null, new String[][] { vecDomain }).vec(0);
}
use of water.parser.BufferedString in project h2o-3 by h2oai.
the class CStrChunk method asciiLStrip.
/**
* Optimized lstrip() & rstrip() methods to operate across the entire CStrChunk buffer in one pass.
*
* NewChunk is the same size as the original, despite trimming.
*
* @param nc NewChunk to be filled with strip version of strings in this chunk
* @param set chars to strip, treated as ASCII
* @return Filled NewChunk
*/
public NewChunk asciiLStrip(NewChunk nc, String set) {
// copy existing data
BufferedString bs = new BufferedString().set(_mem);
//update offsets and byte array
for (int i = 0; i < _len; i++) {
int j = 0;
int off = _valstart + UnsafeUtils.get4(_mem, idx(i));
if (off != NA) {
while (intersects(_mem[off], set)) off++;
while (_mem[off + j] != 0) j++;
nc.addStr(bs.set(_mem, off, j));
} else
nc.addNA();
}
return nc;
}
use of water.parser.BufferedString in project h2o-3 by h2oai.
the class CStrChunk method asciiRStrip.
public NewChunk asciiRStrip(NewChunk nc, String set) {
// copy existing data
BufferedString bs = new BufferedString();
//update offsets and byte array
for (int i = 0; i < _len; i++) {
int j = 0;
int off = _valstart + UnsafeUtils.get4(_mem, idx(i));
if (off != NA) {
//Find end
while (_mem[off + j] != 0) j++;
while (// March back while char in set
intersects(_mem[off + j - 1], set)) j--;
bs.set(_mem, off, j);
nc.addStr(bs);
} else
nc.addNA();
}
return nc;
}
Aggregations