Search in sources :

Example 1 with TestPerson

use of com.twitter.elephantbird.thrift.test.TestPerson in project elephant-bird by twitter.

the class TestErrorsInInput method TestErrorTolerance.

@Test
public void TestErrorTolerance() throws Exception {
    // test configurable error tolerance in EB record reader.
    Assume.assumeTrue(pigServer != null);
    // initialize
    String testDir = System.getProperty("test.build.data") + "/TestErrorTolerance";
    final File inDir = new File(testDir, "in");
    inDir.mkdirs();
    // create input with 100 records with 10% of records with errors.
    RawBlockWriter blk_writer = new RawBlockWriter(createLzoOut(new File(inDir, "1-block.lzo"), conf));
    TestPerson person = records[records.length - 1];
    String expectedStr = personToString(person);
    byte[] properRec = tConverter.toBytes(person);
    byte[] truncatedRec = Arrays.copyOfRange(properRec, 0, properRec.length * 3 / 4);
    final int totalRecords = 100;
    final int pctErrors = 10;
    final int totalErrors = totalRecords * pctErrors / 100;
    final int goodRecords = totalRecords - totalErrors;
    int corruptIdx = new Random().nextInt(10);
    for (int i = 0; i < totalRecords; i++) {
        blk_writer.write((i % 10 == corruptIdx) ? truncatedRec : properRec);
    }
    blk_writer.close();
    String[] expectedRows = new String[goodRecords];
    for (int i = 0; i < goodRecords; i++) {
        expectedRows[i] = expectedStr;
    }
    // A = load 'in' using ThritPigLoader('TestPerson');
    String loadStmt = String.format("A = load '%s' using %s('%s');\n", inDir.toURI().toString(), ThriftPigLoader.class.getName(), TestPerson.class.getName());
    // a simple load should fail.
    pigServer.registerQuery(loadStmt);
    try {
        verifyRows(expectedRows, pigServer.openIterator("A"));
        Assert.assertFalse("A Pig IOException was expected", true);
    } catch (IOException e) {
    // expected.
    }
    // loader should succeed with error rate set to 50%
    pigServer.getPigContext().getProperties().setProperty(LzoRecordReader.BAD_RECORD_THRESHOLD_CONF_KEY, "0.5");
    pigServer.registerQuery(loadStmt);
    verifyRows(expectedRows, pigServer.openIterator("A"));
    // set low threshold and test min_error count works.
    pigServer.getPigContext().getProperties().setProperty(LzoRecordReader.BAD_RECORD_THRESHOLD_CONF_KEY, "0.0001");
    pigServer.getPigContext().getProperties().setProperty(LzoRecordReader.BAD_RECORD_MIN_COUNT_CONF_KEY, "" + (totalErrors + 1));
    verifyRows(expectedRows, pigServer.openIterator("A"));
}
Also used : Random(java.util.Random) RawBlockWriter(com.twitter.elephantbird.mapreduce.io.RawBlockWriter) IOException(java.io.IOException) TestPerson(com.twitter.elephantbird.thrift.test.TestPerson) File(java.io.File) Test(org.junit.Test)

Example 2 with TestPerson

use of com.twitter.elephantbird.thrift.test.TestPerson in project elephant-bird by twitter.

the class TestThriftMultiFormatLoader method setUp.

@Before
public void setUp() throws Exception {
    Configuration conf = new Configuration();
    Assume.assumeTrue(CoreTestUtil.okToRunLzoTests(conf));
    pigServer = PigTestUtil.makePigServer();
    inputDir.mkdirs();
    // write to block file
    ThriftBlockWriter<TestPerson> blk_writer = new ThriftBlockWriter<TestPerson>(createLzoOut("1-block.lzo", conf), TestPerson.class);
    for (TestPerson rec : records) {
        blk_writer.write(rec);
    }
    blk_writer.close();
    // write tb64 lines
    LzoBinaryB64LineRecordWriter<TestPerson, ThriftWritable<TestPerson>> b64_writer = LzoBinaryB64LineRecordWriter.newThriftWriter(TestPerson.class, createLzoOut("2-b64.lzo", conf));
    for (TestPerson rec : records) {
        thriftWritable.set(rec);
        b64_writer.write(null, thriftWritable);
    }
    b64_writer.close(null);
}
Also used : ThriftWritable(com.twitter.elephantbird.mapreduce.io.ThriftWritable) Configuration(org.apache.hadoop.conf.Configuration) TestPerson(com.twitter.elephantbird.thrift.test.TestPerson) ThriftBlockWriter(com.twitter.elephantbird.mapreduce.io.ThriftBlockWriter) Before(org.junit.Before)

Example 3 with TestPerson

use of com.twitter.elephantbird.thrift.test.TestPerson in project elephant-bird by twitter.

the class TestThriftToPig method tupleTest.

private void tupleTest(TestType type) throws Exception {
    OneOfEach ooe = Fixtures.oneOfEach;
    Nesting n = Fixtures.nesting;
    ThriftConverter<HolyMoley> hmConverter = ThriftConverter.newInstance(HolyMoley.class);
    // use a deserialized hm object so that hm.contains HashSet iteration is a bit more predictable
    HolyMoley hm = hmConverter.fromBytes(hmConverter.toBytes(Fixtures.holyMoley));
    assertEquals("1-0-35-27000-16777216-6000000000-3.141592653589793-JSON THIS! \"-" + ooe.zomg_unicode + "-0-base64-{(1),(2),(3)}-{(1),(2),(3)}-{(1),(2),(3)}", toTuple(type, ooe).toDelimitedString("-"));
    assertEquals("(31337,I am a bonk... xor!)-(1,0,35,27000,16777216,6000000000,3.141592653589793,JSON THIS! \"," + n.my_ooe.zomg_unicode + ",0,base64,{(1),(2),(3)},{(1),(2),(3)},{(1),(2),(3)})", toTuple(type, n).toDelimitedString("-"));
    assertEquals("{(1,0,34,27000,16777216,6000000000,3.141592653589793,JSON THIS! \"," + ooe.zomg_unicode + ",0,base64,{(1),(2),(3)},{(1),(2),(3)},{(1),(2),(3)}),(1,0,35,27000,16777216,6000000000,3.141592653589793,JSON THIS! \"," + ooe.zomg_unicode + ",0,base64,{(1),(2),(3)},{(1),(2),(3)},{(1),(2),(3)})}-{({}),({(and a one),(and a two)}),({(then a one, two),(three!),(FOUR!!)})}-{zero={}, three={}, two={(1,Wait.),(2,What?)}}", (toTuple(type, hm).toDelimitedString("-")));
    // Test null fields. Pick the fields that have defaults of null
    // so that extra round of seralization and deserialization does not affect it.
    OneOfEach mostly_ooe = new OneOfEach(ooe);
    mostly_ooe.setBase64((ByteBuffer) null);
    mostly_ooe.setZomg_unicode(null);
    assertEquals("1-0-35-27000-16777216-6000000000-3.141592653589793-JSON THIS! \"--0--{(1),(2),(3)}-{(1),(2),(3)}-{(1),(2),(3)}", toTuple(type, mostly_ooe).toDelimitedString("-"));
    Nesting n2 = new Nesting(n);
    n2.getMy_bonk().setMessage(null);
    n2.setMy_ooe(mostly_ooe);
    assertEquals("(31337,)-(1,0,35,27000,16777216,6000000000,3.141592653589793,JSON THIS! \",,0,,{(1),(2),(3)},{(1),(2),(3)},{(1),(2),(3)})", toTuple(type, n2).toDelimitedString("-"));
    // test enum.
    ThriftToPig.setConversionProperties(new Configuration(false));
    PhoneNumber ph = new PhoneNumber();
    ph.setNumber("415-555-5555");
    ph.setType(PhoneType.HOME);
    assertEquals("415-555-5555,HOME", toTuple(type, ph).toDelimitedString(","));
    Person person = new Person(new Name("bob", "jenkins"), 42, "foo@bar.com", Lists.newArrayList(ph));
    assertEquals("(bob,jenkins),42,foo@bar.com,{(415-555-5555,HOME)}", toTuple(type, person).toDelimitedString(","));
    // test Enum map
    TestPerson testPerson = new TestPerson(new TestName("bob", "jenkins"), ImmutableMap.of(TestPhoneType.HOME, "408-555-5555", TestPhoneType.MOBILE, "650-555-5555", TestPhoneType.WORK, "415-555-5555"));
    String tupleString = toTuple(type, testPerson).toDelimitedString("-");
    assertTrue(tupleString.equals("(bob,jenkins)-{MOBILE=650-555-5555, WORK=415-555-5555, HOME=408-555-5555}") || tupleString.equals("(bob,jenkins)-{MOBILE=650-555-5555, HOME=408-555-5555, WORK=415-555-5555}"));
    // Test Union:
    TestUnion unionInt = new TestUnion();
    unionInt.setI32Type(10);
    assertEquals(",10,,,", toTuple(type, unionInt).toDelimitedString(","));
    TestUnion unionStr = new TestUnion();
    // is overridden below.
    unionStr.setI32Type(-1);
    unionStr.setStringType("abcd");
    assertEquals("abcd,,,,", toTuple(type, unionStr).toDelimitedString(","));
}
Also used : Nesting(thrift.test.Nesting) TestName(com.twitter.elephantbird.thrift.test.TestName) HolyMoley(thrift.test.HolyMoley) Configuration(org.apache.hadoop.conf.Configuration) TestUnion(com.twitter.elephantbird.thrift.test.TestUnion) PhoneNumber(com.twitter.elephantbird.thrift.test.PhoneNumber) TestPerson(com.twitter.elephantbird.thrift.test.TestPerson) Person(com.twitter.elephantbird.thrift.test.Person) TestPerson(com.twitter.elephantbird.thrift.test.TestPerson) OneOfEach(thrift.test.OneOfEach) TestName(com.twitter.elephantbird.thrift.test.TestName) Name(com.twitter.elephantbird.thrift.test.Name)

Example 4 with TestPerson

use of com.twitter.elephantbird.thrift.test.TestPerson in project parquet-mr by apache.

the class TestParquetReadProtocol method testStructInMap.

@Test
public void testStructInMap() throws Exception {
    final Map<String, TestPerson> map = new HashMap<String, TestPerson>();
    map.put("foo", new TestPerson(new TestName("john", "johnson"), new HashMap<TestPhoneType, String>()));
    final Map<String, Integer> stringToIntMap = Collections.singletonMap("bar", 10);
    TestStructInMap testMap = new TestStructInMap("map_name", map, stringToIntMap);
    validate(testMap);
}
Also used : TestName(com.twitter.elephantbird.thrift.test.TestName) HashMap(java.util.HashMap) TestPerson(com.twitter.elephantbird.thrift.test.TestPerson) TestStructInMap(com.twitter.elephantbird.thrift.test.TestStructInMap) Test(org.junit.Test)

Example 5 with TestPerson

use of com.twitter.elephantbird.thrift.test.TestPerson in project parquet-mr by apache.

the class TestThriftToPigCompatibility method testStructInMap.

@Test
public void testStructInMap() throws Exception {
    final Map<String, TestPerson> map = new HashMap<String, TestPerson>();
    map.put("foo", new TestPerson(new TestName("john", "johnson"), new HashMap<TestPhoneType, String>()));
    final Map<String, Integer> stringToIntMap = Collections.singletonMap("bar", 10);
    TestStructInMap testMap = new TestStructInMap("map_name", map, stringToIntMap);
    validateSameTupleAsEB(testMap);
}
Also used : TestName(com.twitter.elephantbird.thrift.test.TestName) HashMap(java.util.HashMap) TestPerson(com.twitter.elephantbird.thrift.test.TestPerson) TestStructInMap(com.twitter.elephantbird.thrift.test.TestStructInMap) Test(org.junit.Test)

Aggregations

TestPerson (com.twitter.elephantbird.thrift.test.TestPerson)8 Test (org.junit.Test)6 TestName (com.twitter.elephantbird.thrift.test.TestName)4 TestStructInMap (com.twitter.elephantbird.thrift.test.TestStructInMap)3 HashMap (java.util.HashMap)3 RawBlockWriter (com.twitter.elephantbird.mapreduce.io.RawBlockWriter)2 File (java.io.File)2 Configuration (org.apache.hadoop.conf.Configuration)2 Tuple (org.apache.pig.data.Tuple)2 ThriftBlockWriter (com.twitter.elephantbird.mapreduce.io.ThriftBlockWriter)1 ThriftWritable (com.twitter.elephantbird.mapreduce.io.ThriftWritable)1 Name (com.twitter.elephantbird.thrift.test.Name)1 Person (com.twitter.elephantbird.thrift.test.Person)1 PhoneNumber (com.twitter.elephantbird.thrift.test.PhoneNumber)1 TestUnion (com.twitter.elephantbird.thrift.test.TestUnion)1 DataOutputStream (java.io.DataOutputStream)1 FileOutputStream (java.io.FileOutputStream)1 IOException (java.io.IOException)1 OutputStream (java.io.OutputStream)1 Random (java.util.Random)1