Search in sources :

Example 1 with IcedLong

use of water.util.IcedLong in project h2o-3 by h2oai.

the class CreateInteractions method makeDomain.

// Create a combined domain from the categorical values that map to domain A and domain B
// Both categorical integers are combined into a long = (int,int), and the unsortedMap keeps the occurrence count for each pair-wise interaction
public String[] makeDomain(Map<IcedLong, IcedLong> unsortedMap, String[] dA, String[] dB) {
    String[] _domain;
    //    Log.info("Collected hash table");
    //    Log.info(java.util.Arrays.deepToString(unsortedMap.entrySet().toArray()));
    //    Log.info("Interaction between " + dA.length + " and " + dB.length + " factor levels => " +
    //            ((long)dA.length * dB.length) + " possible factors.");
    _sortedMap = mySort(unsortedMap);
    // create domain of the most frequent unique factors
    long factorCount = 0;
    //    Log.info("Found " + _sortedMap.size() + " unique interaction factors (out of " + ((long)dA.length * (long)dB.length) + ").");
    //TODO: use ArrayList here, then convert to array
    _domain = new String[_sortedMap.size()];
    Iterator it2 = _sortedMap.entrySet().iterator();
    int d = 0;
    while (it2.hasNext()) {
        Map.Entry kv = (Map.Entry) it2.next();
        final long ab = (Long) kv.getKey();
        final long count = (Long) kv.getValue();
        if (factorCount < _ci._max_factors && count >= _ci._min_occurrence) {
            factorCount++;
            // extract the two original factor categoricals
            String feature = "";
            if (dA != dB) {
                int a = (int) (ab >> 32);
                final String fA = a != _missing ? dA[a] : "NA";
                feature = fA + "_";
            }
            int b = (int) ab;
            String fB = b != _missing ? dB[b] : "NA";
            feature += fB;
            //        Log.info("Adding interaction feature " + feature + ", occurrence count: " + count);
            //        Log.info("Total number of interaction factors so far: " + factorCount);
            _domain[d++] = feature;
        } else
            break;
    }
    if (d < _sortedMap.size()) {
        //      Log.info("Truncated map to " + _sortedMap.size() + " elements.");
        String[] copy = new String[d + 1];
        System.arraycopy(_domain, 0, copy, 0, d);
        copy[d] = _other;
        _domain = copy;
        Map tm = new LinkedHashMap<>();
        it2 = _sortedMap.entrySet().iterator();
        while (--d >= 0) {
            Map.Entry kv = (Map.Entry) it2.next();
            tm.put(kv.getKey(), kv.getValue());
        }
        _sortedMap = tm;
    }
    //    Log.info("Created domain: " + Arrays.deepToString(_domain));
    return _domain;
}
Also used : IcedLong(water.util.IcedLong) IcedHashMap(water.util.IcedHashMap)

Example 2 with IcedLong

use of water.util.IcedLong in project h2o-3 by h2oai.

the class WordCountTask method read_impl.

public final WordCountTask read_impl(AutoBuffer ab) {
    _counts = new IcedHashMap<>();
    int len;
    while ((len = ab.get2()) != 65535) {
        // Read until end-of-map marker
        byte[] bs = ab.getA1(len);
        long cnt = ab.get8();
        _counts.put(new BufferedString(new String(bs)), new IcedLong(cnt));
    }
    return this;
}
Also used : IcedLong(water.util.IcedLong) BufferedString(water.parser.BufferedString) BufferedString(water.parser.BufferedString)

Example 3 with IcedLong

use of water.util.IcedLong in project h2o-3 by h2oai.

the class IcedHasMapGenericTest method testSerialization.

@Test
public void testSerialization() {
    IcedHashMapGeneric m = new IcedHashMapGeneric();
    // String -> String pair
    m.put("haha", "gaga");
    // String -> String pair
    m.put("str->freezable", new IcedDouble(3.14));
    // String -> String pair
    m.put("str->freezable[]", new Freezable[] { new IcedDouble(3.14) });
    // String -> String pair
    m.put("str->Integer", 314);
    // Freezable -> String pair
    m.put(new BufferedString("haha2"), "gaga");
    // String -> String pair
    m.put(new BufferedString("str->freezable2"), new IcedDouble(3.14));
    // String -> String pair
    m.put(new BufferedString("str->freezable[]2"), new Freezable[] { new IcedDouble(3.14) });
    // String -> String pair
    m.put(new BufferedString("str->Integer2"), 314);
    // String -> String pair
    m.put(new IcedLong(1234), 1234);
    byte[] buf = new AutoBuffer().put(m).buf();
    IcedHashMapGeneric m2 = new AutoBuffer(buf).get();
    assertEquals(m.size(), m2.size());
    Set<Map.Entry> entries = m.entrySet();
    for (Map.Entry e : entries) {
        if (e.getValue() instanceof Freezable[])
            assert Arrays.deepEquals((Freezable[]) e.getValue(), (Freezable[]) m2.get(e.getKey()));
        else
            assertEquals(e.getValue(), m2.get(e.getKey()));
    }
}
Also used : IcedHashMapGeneric(water.util.IcedHashMapGeneric) IcedDouble(water.util.IcedDouble) IcedLong(water.util.IcedLong) BufferedString(water.parser.BufferedString) Map(java.util.Map) Test(org.junit.Test)

Example 4 with IcedLong

use of water.util.IcedLong in project h2o-3 by h2oai.

the class CreateInteractions method mySort.

private static Map<Long, Long> mySort(Map<IcedLong, IcedLong> unsortMap) {
    List<Map.Entry<IcedLong, IcedLong>> list = new LinkedList<>(unsortMap.entrySet());
    // Sorting the list based on values
    Collections.sort(list, new Comparator<Map.Entry<IcedLong, IcedLong>>() {

        public int compare(Map.Entry<IcedLong, IcedLong> o1, Map.Entry<IcedLong, IcedLong> o2) {
            return ((Long) o2.getValue()._val).compareTo(o1.getValue()._val);
        }
    });
    // Maintaining insertion order with the help of LinkedList
    Map sortedMap = new LinkedHashMap<>();
    for (Map.Entry<IcedLong, IcedLong> entry : list) {
        sortedMap.put(entry.getKey()._val, entry.getValue()._val);
    }
    return sortedMap;
}
Also used : IcedLong(water.util.IcedLong) IcedHashMap(water.util.IcedHashMap)

Example 5 with IcedLong

use of water.util.IcedLong in project h2o-3 by h2oai.

the class WordCountTaskTest method testWordCountText8.

@Test
public void testWordCountText8() {
    String fName = "bigdata/laptop/text8.gz";
    // only run if text8 is present
    assumeThat("text8 data available", locateFile(fName), is(notNullValue()));
    Frame fr = parse_test_file(fName, "NA", 0, new byte[] { Vec.T_STR });
    try {
        Map<BufferedString, IcedLong> counts = new WordCountTask().doAll(fr.vec(0))._counts;
        assertEquals(253854, counts.size());
        assertEquals(303L, counts.get(new BufferedString("anarchism"))._val);
        assertEquals(316376L, counts.get(new BufferedString("to"))._val);
        assertNotNull(counts);
    } finally {
        fr.remove();
    }
}
Also used : Frame(water.fvec.Frame) IcedLong(water.util.IcedLong) BufferedString(water.parser.BufferedString) BufferedString(water.parser.BufferedString) Test(org.junit.Test)

Aggregations

IcedLong (water.util.IcedLong)8 BufferedString (water.parser.BufferedString)5 Test (org.junit.Test)3 Frame (water.fvec.Frame)2 IcedHashMap (water.util.IcedHashMap)2 Map (java.util.Map)1 TestFrameBuilder (water.fvec.TestFrameBuilder)1 IcedDouble (water.util.IcedDouble)1 IcedHashMapGeneric (water.util.IcedHashMapGeneric)1