use of water.util.IcedLong in project h2o-3 by h2oai.
the class CreateInteractions method makeDomain.
// Create a combined domain from the categorical values that map to domain A and domain B
// Both categorical integers are combined into a long = (int,int), and the unsortedMap keeps the occurrence count for each pair-wise interaction
public String[] makeDomain(Map<IcedLong, IcedLong> unsortedMap, String[] dA, String[] dB) {
String[] _domain;
// Log.info("Collected hash table");
// Log.info(java.util.Arrays.deepToString(unsortedMap.entrySet().toArray()));
// Log.info("Interaction between " + dA.length + " and " + dB.length + " factor levels => " +
// ((long)dA.length * dB.length) + " possible factors.");
_sortedMap = mySort(unsortedMap);
// create domain of the most frequent unique factors
long factorCount = 0;
// Log.info("Found " + _sortedMap.size() + " unique interaction factors (out of " + ((long)dA.length * (long)dB.length) + ").");
//TODO: use ArrayList here, then convert to array
_domain = new String[_sortedMap.size()];
Iterator it2 = _sortedMap.entrySet().iterator();
int d = 0;
while (it2.hasNext()) {
Map.Entry kv = (Map.Entry) it2.next();
final long ab = (Long) kv.getKey();
final long count = (Long) kv.getValue();
if (factorCount < _ci._max_factors && count >= _ci._min_occurrence) {
factorCount++;
// extract the two original factor categoricals
String feature = "";
if (dA != dB) {
int a = (int) (ab >> 32);
final String fA = a != _missing ? dA[a] : "NA";
feature = fA + "_";
}
int b = (int) ab;
String fB = b != _missing ? dB[b] : "NA";
feature += fB;
// Log.info("Adding interaction feature " + feature + ", occurrence count: " + count);
// Log.info("Total number of interaction factors so far: " + factorCount);
_domain[d++] = feature;
} else
break;
}
if (d < _sortedMap.size()) {
// Log.info("Truncated map to " + _sortedMap.size() + " elements.");
String[] copy = new String[d + 1];
System.arraycopy(_domain, 0, copy, 0, d);
copy[d] = _other;
_domain = copy;
Map tm = new LinkedHashMap<>();
it2 = _sortedMap.entrySet().iterator();
while (--d >= 0) {
Map.Entry kv = (Map.Entry) it2.next();
tm.put(kv.getKey(), kv.getValue());
}
_sortedMap = tm;
}
// Log.info("Created domain: " + Arrays.deepToString(_domain));
return _domain;
}
use of water.util.IcedLong in project h2o-3 by h2oai.
the class WordCountTask method read_impl.
public final WordCountTask read_impl(AutoBuffer ab) {
_counts = new IcedHashMap<>();
int len;
while ((len = ab.get2()) != 65535) {
// Read until end-of-map marker
byte[] bs = ab.getA1(len);
long cnt = ab.get8();
_counts.put(new BufferedString(new String(bs)), new IcedLong(cnt));
}
return this;
}
use of water.util.IcedLong in project h2o-3 by h2oai.
the class IcedHasMapGenericTest method testSerialization.
@Test
public void testSerialization() {
IcedHashMapGeneric m = new IcedHashMapGeneric();
// String -> String pair
m.put("haha", "gaga");
// String -> String pair
m.put("str->freezable", new IcedDouble(3.14));
// String -> String pair
m.put("str->freezable[]", new Freezable[] { new IcedDouble(3.14) });
// String -> String pair
m.put("str->Integer", 314);
// Freezable -> String pair
m.put(new BufferedString("haha2"), "gaga");
// String -> String pair
m.put(new BufferedString("str->freezable2"), new IcedDouble(3.14));
// String -> String pair
m.put(new BufferedString("str->freezable[]2"), new Freezable[] { new IcedDouble(3.14) });
// String -> String pair
m.put(new BufferedString("str->Integer2"), 314);
// String -> String pair
m.put(new IcedLong(1234), 1234);
byte[] buf = new AutoBuffer().put(m).buf();
IcedHashMapGeneric m2 = new AutoBuffer(buf).get();
assertEquals(m.size(), m2.size());
Set<Map.Entry> entries = m.entrySet();
for (Map.Entry e : entries) {
if (e.getValue() instanceof Freezable[])
assert Arrays.deepEquals((Freezable[]) e.getValue(), (Freezable[]) m2.get(e.getKey()));
else
assertEquals(e.getValue(), m2.get(e.getKey()));
}
}
use of water.util.IcedLong in project h2o-3 by h2oai.
the class CreateInteractions method mySort.
private static Map<Long, Long> mySort(Map<IcedLong, IcedLong> unsortMap) {
List<Map.Entry<IcedLong, IcedLong>> list = new LinkedList<>(unsortMap.entrySet());
// Sorting the list based on values
Collections.sort(list, new Comparator<Map.Entry<IcedLong, IcedLong>>() {
public int compare(Map.Entry<IcedLong, IcedLong> o1, Map.Entry<IcedLong, IcedLong> o2) {
return ((Long) o2.getValue()._val).compareTo(o1.getValue()._val);
}
});
// Maintaining insertion order with the help of LinkedList
Map sortedMap = new LinkedHashMap<>();
for (Map.Entry<IcedLong, IcedLong> entry : list) {
sortedMap.put(entry.getKey()._val, entry.getValue()._val);
}
return sortedMap;
}
use of water.util.IcedLong in project h2o-3 by h2oai.
the class WordCountTaskTest method testWordCountText8.
@Test
public void testWordCountText8() {
String fName = "bigdata/laptop/text8.gz";
// only run if text8 is present
assumeThat("text8 data available", locateFile(fName), is(notNullValue()));
Frame fr = parse_test_file(fName, "NA", 0, new byte[] { Vec.T_STR });
try {
Map<BufferedString, IcedLong> counts = new WordCountTask().doAll(fr.vec(0))._counts;
assertEquals(253854, counts.size());
assertEquals(303L, counts.get(new BufferedString("anarchism"))._val);
assertEquals(316376L, counts.get(new BufferedString("to"))._val);
assertNotNull(counts);
} finally {
fr.remove();
}
}
Aggregations