Search in sources :

Example 86 with PigServer

use of org.apache.pig.PigServer in project hive by apache.

the class TestPigHBaseStorageHandler method testPigHBaseSchema.

@Test
public void testPigHBaseSchema() throws Exception {
    Initialize();
    String tableName = newTableName("MyTable");
    String databaseName = newTableName("MyDatabase");
    // Table name will be lower case unless specified by hbase.table.name property
    String hbaseTableName = "testTable";
    String db_dir = HCatUtil.makePathASafeFileName(getTestDir() + "/hbasedb");
    String dbQuery = "CREATE DATABASE IF NOT EXISTS " + databaseName + " LOCATION '" + db_dir + "'";
    String deleteQuery = "DROP TABLE " + databaseName + "." + tableName;
    String tableQuery = "CREATE TABLE " + databaseName + "." + tableName + "(key float, testqualifier1 string, testqualifier2 int) STORED BY " + "'org.apache.hadoop.hive.hbase.HBaseStorageHandler'" + " WITH SERDEPROPERTIES ('hbase.columns.mapping'=':key,testFamily:testQualifier1,testFamily:testQualifier2')" + " TBLPROPERTIES ('hbase.table.name'='" + hbaseTableName + "')";
    driver.run(deleteQuery);
    driver.run(dbQuery);
    driver.run(tableQuery);
    Connection connection = null;
    Admin hAdmin = null;
    boolean doesTableExist = false;
    try {
        connection = ConnectionFactory.createConnection(getHbaseConf());
        hAdmin = connection.getAdmin();
        doesTableExist = hAdmin.tableExists(TableName.valueOf(hbaseTableName));
    } finally {
        if (hAdmin != null) {
            hAdmin.close();
        }
        if (connection != null) {
            connection.close();
        }
    }
    assertTrue(doesTableExist);
    PigServer server = HCatBaseTest.createPigServer(false, hcatConf.getAllProperties());
    server.registerQuery("A = load '" + databaseName + "." + tableName + "' using org.apache.hive.hcatalog.pig.HCatLoader();");
    Schema dumpedASchema = server.dumpSchema("A");
    List<FieldSchema> fields = dumpedASchema.getFields();
    assertEquals(3, fields.size());
    assertEquals(DataType.FLOAT, fields.get(0).type);
    assertEquals("key", fields.get(0).alias.toLowerCase());
    assertEquals(DataType.CHARARRAY, fields.get(1).type);
    assertEquals("testQualifier1".toLowerCase(), fields.get(1).alias.toLowerCase());
    assertEquals(DataType.INTEGER, fields.get(2).type);
    assertEquals("testQualifier2".toLowerCase(), fields.get(2).alias.toLowerCase());
}
Also used : PigServer(org.apache.pig.PigServer) Schema(org.apache.pig.impl.logicalLayer.schema.Schema) FieldSchema(org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema) FieldSchema(org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema) Connection(org.apache.hadoop.hbase.client.Connection) Admin(org.apache.hadoop.hbase.client.Admin) Test(org.junit.Test) HCatBaseTest(org.apache.hive.hcatalog.mapreduce.HCatBaseTest)

Example 87 with PigServer

use of org.apache.pig.PigServer in project hive by apache.

the class TestPigHBaseStorageHandler method testPigFilterProjection.

@Test
public void testPigFilterProjection() throws Exception {
    Initialize();
    String tableName = newTableName("MyTable");
    String databaseName = newTableName("MyDatabase");
    // Table name will be lower case unless specified by hbase.table.name property
    String hbaseTableName = (databaseName + "." + tableName).toLowerCase();
    String db_dir = HCatUtil.makePathASafeFileName(getTestDir() + "/hbasedb");
    String dbQuery = "CREATE DATABASE IF NOT EXISTS " + databaseName + " LOCATION '" + db_dir + "'";
    String deleteQuery = "DROP TABLE " + databaseName + "." + tableName;
    String tableQuery = "CREATE TABLE " + databaseName + "." + tableName + "(key int, testqualifier1 string, testqualifier2 string) STORED BY " + "'org.apache.hadoop.hive.hbase.HBaseStorageHandler'" + " WITH SERDEPROPERTIES ('hbase.columns.mapping'=':key,testFamily:testQualifier1,testFamily:testQualifier2')" + " TBLPROPERTIES ('hbase.table.default.storage.type'='binary')";
    driver.run(deleteQuery);
    driver.run(dbQuery);
    driver.run(tableQuery);
    Connection connection = null;
    Admin hAdmin = null;
    Table table = null;
    ResultScanner scanner = null;
    boolean doesTableExist = false;
    try {
        connection = ConnectionFactory.createConnection(getHbaseConf());
        hAdmin = connection.getAdmin();
        doesTableExist = hAdmin.tableExists(TableName.valueOf(hbaseTableName));
        assertTrue(doesTableExist);
        populateHBaseTable(hbaseTableName, connection);
        table = connection.getTable(TableName.valueOf(hbaseTableName));
        Scan scan = new Scan();
        scan.addFamily(Bytes.toBytes("testFamily"));
        scanner = table.getScanner(scan);
    } finally {
        if (scanner != null) {
            scanner.close();
        }
        if (table != null) {
            table.close();
        }
        if (hAdmin != null) {
            hAdmin.close();
        }
        if (connection != null) {
            connection.close();
        }
    }
    int index = 1;
    PigServer server = HCatBaseTest.createPigServer(false, hcatConf.getAllProperties());
    server.registerQuery("A = load '" + databaseName + "." + tableName + "' using org.apache.hive.hcatalog.pig.HCatLoader();");
    server.registerQuery("B = filter A by key < 5;");
    server.registerQuery("C = foreach B generate key,testqualifier2;");
    Iterator<Tuple> itr = server.openIterator("C");
    // verify if the filter is correct and returns 2 rows and contains 2 columns and the contents match
    while (itr.hasNext()) {
        Tuple t = itr.next();
        assertTrue(t.size() == 2);
        assertTrue(t.get(0).getClass() == Integer.class);
        assertEquals(index, t.get(0));
        assertTrue(t.get(1).getClass() == String.class);
        assertEquals("textB-" + index, t.get(1));
        index++;
    }
    assertEquals(index - 1, 4);
}
Also used : Table(org.apache.hadoop.hbase.client.Table) ResultScanner(org.apache.hadoop.hbase.client.ResultScanner) PigServer(org.apache.pig.PigServer) Connection(org.apache.hadoop.hbase.client.Connection) Scan(org.apache.hadoop.hbase.client.Scan) Admin(org.apache.hadoop.hbase.client.Admin) Tuple(org.apache.pig.data.Tuple) Test(org.junit.Test) HCatBaseTest(org.apache.hive.hcatalog.mapreduce.HCatBaseTest)

Example 88 with PigServer

use of org.apache.pig.PigServer in project pigeon by aseldawy.

the class TestMakeLine method testShouldWorkWithWKT.

public void testShouldWorkWithWKT() throws Exception {
    ArrayList<String[]> data = new ArrayList<String[]>();
    data.add(new String[] { "0", "0", "POINT (0.0 0.0)" });
    data.add(new String[] { "0", "1", "POINT (0.0 3.0)" });
    data.add(new String[] { "0", "2", "POINT (4.0 5.0)" });
    data.add(new String[] { "0", "3", "POINT (10.0 0.0)" });
    data.add(new String[] { "1", "0", "POINT (5.0 6.0)" });
    data.add(new String[] { "1", "1", "POINT (10.0 3.0)" });
    data.add(new String[] { "1", "2", "POINT (7.0 13.0)" });
    String datafile = TestHelper.createTempFile(data, "\t");
    datafile = datafile.replace("\\", "\\\\");
    PigServer pig = new PigServer(LOCAL);
    String query = "A = LOAD 'file:" + datafile + "' as (geom_id, point_pos, point);\n" + "B = ORDER A BY point_pos;" + "C = GROUP B BY geom_id;" + "D = FOREACH C GENERATE group, " + MakeLine.class.getName() + "(B.point);";
    pig.registerQuery(query);
    Iterator<?> it = pig.openIterator("D");
    Vector<String> expectedResult = new Vector<String>();
    expectedResult.add("LINESTRING(0 0, 0 3, 4 5, 10 0)");
    expectedResult.add("LINESTRING(5 6, 10 3, 7  13)");
    Iterator<String> geoms = expectedResult.iterator();
    int count = 0;
    while (it.hasNext() && geoms.hasNext()) {
        Tuple tuple = (Tuple) it.next();
        String expected_result = geoms.next();
        if (tuple == null)
            break;
        TestHelper.assertGeometryEqual(expected_result, tuple.get(1));
        count++;
    }
    assertEquals(expectedResult.size(), count);
}
Also used : PigServer(org.apache.pig.PigServer) ArrayList(java.util.ArrayList) Vector(java.util.Vector) Tuple(org.apache.pig.data.Tuple)

Example 89 with PigServer

use of org.apache.pig.PigServer in project pigeon by aseldawy.

the class TestNumPoints method testShouldWorkWithGeometries.

public void testShouldWorkWithGeometries() throws Exception {
    // Create polygons
    ArrayList<String[]> data = new ArrayList<String[]>();
    data.add(new String[] { "0", "LINESTRING(0 0, 6 0, 0 6, 0 0)" });
    data.add(new String[] { "1", "POLYGON((3 2, 8 2, 3 7, 3 2))" });
    data.add(new String[] { "2", "POINT(3 2)" });
    data.add(new String[] { "3", "GEOMETRYCOLLECTION(POINT(0 0), LINESTRING(2 -2, 9 -2, 9 5, 2 10))" });
    String datafile = TestHelper.createTempFile(data, "\t");
    datafile = datafile.replace("\\", "\\\\");
    PigServer pig = new PigServer(LOCAL);
    String query = "A = LOAD 'file:" + datafile + "' as (id, geom);\n" + "B = FOREACH A GENERATE " + NumPoints.class.getName() + "(geom);";
    pig.registerQuery(query);
    Iterator<?> it = pig.openIterator("B");
    int output_size = 0;
    int[] correct_sizes = { 4, 3, 1, 5 };
    while (it.hasNext()) {
        Tuple tuple = (Tuple) it.next();
        if (tuple == null)
            break;
        assertEquals(correct_sizes[output_size], (int) (Integer) tuple.get(0));
        output_size++;
    }
    assertEquals(correct_sizes.length, output_size);
}
Also used : PigServer(org.apache.pig.PigServer) ArrayList(java.util.ArrayList) Tuple(org.apache.pig.data.Tuple)

Example 90 with PigServer

use of org.apache.pig.PigServer in project pigeon by aseldawy.

the class TestXMin method testShouldWorkWithPoints.

public void testShouldWorkWithPoints() throws Exception {
    ArrayList<String[]> data = new ArrayList<String[]>();
    data.add(new String[] { "1", "POINT (0 0)" });
    String datafile = TestHelper.createTempFile(data, "\t");
    datafile = datafile.replace("\\", "\\\\");
    PigServer pig = new PigServer(LOCAL);
    String query = "A = LOAD 'file:" + datafile + "' as (id, geom);\n" + "B = FOREACH A GENERATE " + XMin.class.getName() + "(geom);";
    pig.registerQuery(query);
    Iterator<?> it = pig.openIterator("B");
    ArrayList<Double> correct_result = new ArrayList<Double>();
    correct_result.add(0.0);
    Iterator<Double> xmins = correct_result.iterator();
    while (it.hasNext() && xmins.hasNext()) {
        Tuple tuple = (Tuple) it.next();
        if (tuple == null)
            break;
        Double xmin = (Double) tuple.get(0);
        assertEquals(xmins.next(), xmin);
    }
}
Also used : PigServer(org.apache.pig.PigServer) ArrayList(java.util.ArrayList) Tuple(org.apache.pig.data.Tuple)

Aggregations

PigServer (org.apache.pig.PigServer)115 Tuple (org.apache.pig.data.Tuple)74 ArrayList (java.util.ArrayList)70 Test (org.junit.Test)59 HCatBaseTest (org.apache.hive.hcatalog.mapreduce.HCatBaseTest)37 Data (org.apache.pig.builtin.mock.Storage.Data)15 File (java.io.File)14 Schema (org.apache.pig.impl.logicalLayer.schema.Schema)14 FieldSchema (org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema)9 Properties (java.util.Properties)8 Vector (java.util.Vector)8 HCatFieldSchema (org.apache.hive.hcatalog.data.schema.HCatFieldSchema)6 Path (org.apache.hadoop.fs.Path)4 FileWriter (java.io.FileWriter)3 List (java.util.List)3 Map (java.util.Map)3 Admin (org.apache.hadoop.hbase.client.Admin)3 Connection (org.apache.hadoop.hbase.client.Connection)3 Pair (org.apache.hive.hcatalog.data.Pair)3 ExecJob (org.apache.pig.backend.executionengine.ExecJob)3