Search in sources :

Example 1 with Example

use of com.airbnb.aerosolve.core.Example in project aerosolve by airbnb.

the class ModelScorerTest method rawProbability.

@Test
public void rawProbability() throws Exception {
    ModelConfig incomeModel = ModelConfig.builder().modelName("income.model").configName("income_prediction.conf").key("spline_model").build();
    ModelScorer modelScorer = new ModelScorer(incomeModel);
    FeatureMapping featureMapping = new FeatureMapping();
    featureMapping.add(dataName1);
    featureMapping.add(dataName2);
    featureMapping.add(dataName3);
    featureMapping.finish();
    FeatureGen f = new FeatureGen(featureMapping);
    f.add(data1, dataName1);
    f.add(data2, dataName2);
    f.add(data3, dataName3);
    Features features = f.gen();
    List<StringFamily> stringFamilies = new ArrayList<>();
    stringFamilies.add(new StringFamily("S"));
    List<FloatFamily> floatFamilies = new ArrayList<>();
    floatFamilies.add(new FloatFamily("F"));
    Example example = FeatureVectorGen.toSingleFeatureVectorExample(features, stringFamilies, floatFamilies);
    FeatureVector featureVector = example.getExample().get(0);
    final Map<String, Map<String, Double>> floatFeatures = featureVector.getFloatFeatures();
    Map<String, Double> floatFeatureFamily = floatFeatures.get("F");
    assertEquals(floatFeatureFamily.get("age"), 30, 0.1);
    assertEquals(floatFeatureFamily.get("hours"), 40, 0.1);
    final Map<String, Set<String>> stringFeatures = featureVector.getStringFeatures();
    Set<String> stringFeatureFamily = stringFeatures.get("S");
    assertFalse(stringFeatureFamily.contains("marital-status"));
    assertTrue(stringFeatureFamily.contains("married"));
    double score = modelScorer.score(example);
    log.info("score {}", score);
}
Also used : FeatureVector(com.airbnb.aerosolve.core.FeatureVector) Set(java.util.Set) ArrayList(java.util.ArrayList) Example(com.airbnb.aerosolve.core.Example) Map(java.util.Map) Test(org.junit.Test)

Example 2 with Example

use of com.airbnb.aerosolve.core.Example in project aerosolve by airbnb.

the class ModelTransformsTest method testTransform.

@Test
public void testTransform() {
    Config config = ConfigFactory.parseString(makeConfig());
    Transformer transformer = new Transformer(config, "model_transforms");
    Example example = makeExample();
    transformer.combineContextAndItems(example);
    assertTrue(example.example.size() == 2);
    FeatureVector ex = example.example.get(0);
    assertTrue(ex.stringFeatures.size() == 3);
    assertTrue(ex.stringFeatures.get("guest_loc_quantized").contains("lat=10"));
    assertTrue(ex.stringFeatures.get("guest_loc_quantized").contains("long=20"));
    assertTrue(ex.stringFeatures.get("host_loc_quantized").contains("lat=31"));
    assertTrue(ex.stringFeatures.get("host_loc_quantized").contains("long=42"));
    assertTrue(ex.stringFeatures.get("gxh_loc").contains("lat=10^lat=31"));
    assertTrue(ex.stringFeatures.get("gxh_loc").contains("long=20^lat=31"));
    assertTrue(ex.stringFeatures.get("gxh_loc").contains("lat=10^long=42"));
    assertTrue(ex.stringFeatures.get("gxh_loc").contains("long=20^long=42"));
    assertTrue(ex.floatFeatures.get("guest_loc").get("lat") == 1.0);
    assertTrue(ex.floatFeatures.get("guest_loc").get("long") == 2.0);
    assertTrue(ex.floatFeatures.get("host_loc").get("lat") == 3.1);
    assertTrue(ex.floatFeatures.get("host_loc").get("long") == 4.2);
    assertTrue(ex.denseFeatures.get("guest_loc_dense").contains(1.0));
    assertTrue(ex.denseFeatures.get("guest_loc_dense").contains(2.0));
    assertTrue(ex.denseFeatures.get("host_loc_dense").contains(3.1));
    assertTrue(ex.denseFeatures.get("host_loc_dense").contains(4.2));
    log.info(example.toString());
}
Also used : FeatureVector(com.airbnb.aerosolve.core.FeatureVector) Config(com.typesafe.config.Config) Example(com.airbnb.aerosolve.core.Example) Test(org.junit.Test)

Example 3 with Example

use of com.airbnb.aerosolve.core.Example in project aerosolve by airbnb.

the class UtilTest method testEncodeDecodeExample.

@Test
public void testEncodeDecodeExample() {
    FeatureVector featureVector = makeFeatureVector();
    Example example = new Example();
    example.addToExample(featureVector);
    String str = Util.encode(example);
    assertTrue(str.length() > 0);
    log.info(str);
    Example example2 = Util.decodeExample(str);
    assertTrue(example2.example.size() == 1);
}
Also used : FeatureVector(com.airbnb.aerosolve.core.FeatureVector) Example(com.airbnb.aerosolve.core.Example) Test(org.junit.Test)

Example 4 with Example

use of com.airbnb.aerosolve.core.Example in project aerosolve by airbnb.

the class Util method flattenFeatureWithDropoutAsStream.

/**
   * Convert a feature vector from example to a nested stream(feature family, stream(feature name.
   * feature value)) with dropout
   *
   * @apiNote Understand Stream can only be iterated once just like iterator, it is crucial to set a
   * random seed if one wants to reproduce consistent dropout result.
   */
public static Stream<? extends Map.Entry<String, Stream<? extends Map.Entry<String, Double>>>> flattenFeatureWithDropoutAsStream(FeatureVector featureVector, double dropout, long seed) {
    // collect string features into a stream
    Stream<? extends Map.Entry<String, Stream<? extends Map.Entry<String, Double>>>> stringFeatures = Stream.empty();
    if (featureVector.stringFeatures != null) {
        stringFeatures = featureVector.stringFeatures.entrySet().stream().map(entry -> {
            Stream<? extends Map.Entry<String, Double>> values = entry.getValue().stream().map(feature -> new HashMap.SimpleImmutableEntry<>(feature, 1.0));
            return new HashMap.SimpleImmutableEntry<>(entry.getKey(), values);
        });
    }
    // collect float features into a stream
    Stream<? extends Map.Entry<String, Stream<? extends Map.Entry<String, Double>>>> floatFeatures = Stream.empty();
    if (featureVector.floatFeatures != null) {
        floatFeatures = featureVector.floatFeatures.entrySet().stream().map(entry -> new HashMap.SimpleImmutableEntry<>(entry.getKey(), entry.getValue().entrySet().stream()));
    }
    // concat string and float features and apply dropout if necessary
    Stream<? extends Map.Entry<String, Stream<? extends Map.Entry<String, Double>>>> flatFeatures = Stream.concat(stringFeatures, floatFeatures);
    if (dropout > 0) {
        Random random = new Random(seed);
        // dropout needs to be applied in the inner most stream
        return flatFeatures.map(entry -> new HashMap.SimpleImmutableEntry<>(entry.getKey(), entry.getValue().filter(x -> random.nextDouble() >= dropout)));
    } else {
        return flatFeatures;
    }
}
Also used : Arrays(java.util.Arrays) GZIPInputStream(java.util.zip.GZIPInputStream) ModelRecord(com.airbnb.aerosolve.core.ModelRecord) HashMap(java.util.HashMap) Random(java.util.Random) Hashing(com.google.common.hash.Hashing) TSerializer(org.apache.thrift.TSerializer) Base64(org.apache.commons.codec.binary.Base64) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) TDeserializer(org.apache.thrift.TDeserializer) FeatureVector(com.airbnb.aerosolve.core.FeatureVector) Map(java.util.Map) DebugScoreRecord(com.airbnb.aerosolve.core.DebugScoreRecord) Hasher(com.google.common.hash.Hasher) TBase(org.apache.thrift.TBase) HashCode(com.google.common.hash.HashCode) Set(java.util.Set) IOException(java.io.IOException) KDTreeNode(com.airbnb.aerosolve.core.KDTreeNode) DebugScoreDiffRecord(com.airbnb.aerosolve.core.DebugScoreDiffRecord) InputStreamReader(java.io.InputStreamReader) Serializable(java.io.Serializable) Example(com.airbnb.aerosolve.core.Example) Slf4j(lombok.extern.slf4j.Slf4j) List(java.util.List) Stream(java.util.stream.Stream) BufferedReader(java.io.BufferedReader) Comparator(java.util.Comparator) Collections(java.util.Collections) InputStream(java.io.InputStream) Random(java.util.Random) HashMap(java.util.HashMap) GZIPInputStream(java.util.zip.GZIPInputStream) Stream(java.util.stream.Stream) InputStream(java.io.InputStream) HashMap(java.util.HashMap) Map(java.util.Map)

Example 5 with Example

use of com.airbnb.aerosolve.core.Example in project aerosolve by airbnb.

the class Util method createNewExample.

public static Example createNewExample() {
    Example example = new Example();
    example.setContext(createNewFeatureVector());
    example.setExample(new ArrayList<FeatureVector>());
    return example;
}
Also used : FeatureVector(com.airbnb.aerosolve.core.FeatureVector) Example(com.airbnb.aerosolve.core.Example)

Aggregations

Example (com.airbnb.aerosolve.core.Example)11 FeatureVector (com.airbnb.aerosolve.core.FeatureVector)9 Test (org.junit.Test)5 Map (java.util.Map)4 HashMap (java.util.HashMap)3 Set (java.util.Set)3 ArrayList (java.util.ArrayList)2 TDeserializer (org.apache.thrift.TDeserializer)2 DebugScoreDiffRecord (com.airbnb.aerosolve.core.DebugScoreDiffRecord)1 DebugScoreRecord (com.airbnb.aerosolve.core.DebugScoreRecord)1 KDTreeNode (com.airbnb.aerosolve.core.KDTreeNode)1 ModelRecord (com.airbnb.aerosolve.core.ModelRecord)1 HashCode (com.google.common.hash.HashCode)1 Hasher (com.google.common.hash.Hasher)1 Hashing (com.google.common.hash.Hashing)1 Config (com.typesafe.config.Config)1 BufferedReader (java.io.BufferedReader)1 IOException (java.io.IOException)1 InputStream (java.io.InputStream)1 InputStreamReader (java.io.InputStreamReader)1