Examples with HCatFieldSchema - org.apache.hive.hcatalog.data.schema.HCatFieldSchema

Example 16 with HCatFieldSchema

use of org.apache.hive.hcatalog.data.schema.HCatFieldSchema in project hive by apache.

the class TestCommands method testMetadataReplEximCommands.

@Test
public void testMetadataReplEximCommands() throws IOException, CommandNeedRetryException {
    // repl metadata export, has repl.last.id and repl.scope=metadata
    // import repl metadata dump, table metadata changed, allows override, has repl.last.id
    int evid = 222;
    String exportLocation = TEST_PATH + File.separator + "testMetadataReplExim";
    Path tempPath = new Path(TEST_PATH, "testMetadataReplEximTmp");
    String tempLocation = tempPath.toUri().getPath();
    String dbName = "exim";
    String tableName = "basicSrc";
    String importedTableName = "basicDst";
    List<HCatFieldSchema> cols = HCatSchemaUtils.getHCatSchema("b:string").getFields();
    client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE);
    client.createDatabase(HCatCreateDBDesc.create(dbName).ifNotExists(false).build());
    HCatTable table = (new HCatTable(dbName, tableName)).cols(cols).fileFormat("textfile");
    client.createTable(HCatCreateTableDesc.create(table).build());
    HCatTable t = client.getTable(dbName, tableName);
    assertNotNull(t);
    String[] data = new String[] { "eleven", "twelve" };
    HcatTestUtils.createTestDataFile(tempLocation, data);
    CommandProcessorResponse ret = driver.run("LOAD DATA LOCAL INPATH '" + tempLocation + "' OVERWRITE INTO TABLE " + dbName + "." + tableName);
    assertEquals(ret.getResponseCode() + ":" + ret.getErrorMessage(), null, ret.getException());
    CommandProcessorResponse selectRet = driver.run("SELECT * from " + dbName + "." + tableName);
    assertEquals(selectRet.getResponseCode() + ":" + selectRet.getErrorMessage(), null, selectRet.getException());
    List<String> values = new ArrayList<String>();
    driver.getResults(values);
    assertEquals(2, values.size());
    assertEquals(data[0], values.get(0));
    assertEquals(data[1], values.get(1));
    ExportCommand exportMdCmd = new ExportCommand(dbName, tableName, null, exportLocation, true, evid);
    LOG.info("About to run :" + exportMdCmd.get().get(0));
    CommandProcessorResponse ret2 = driver.run(exportMdCmd.get().get(0));
    assertEquals(ret2.getResponseCode() + ":" + ret2.getErrorMessage(), null, ret2.getException());
    List<String> exportPaths = exportMdCmd.cleanupLocationsAfterEvent();
    assertEquals(1, exportPaths.size());
    String metadata = getMetadataContents(exportPaths.get(0));
    LOG.info("Export returned the following _metadata contents:");
    LOG.info(metadata);
    assertTrue(metadata + "did not match \"repl.scope\"=\"metadata\"", metadata.matches(".*\"repl.scope\":\"metadata\".*"));
    assertTrue(metadata + "has \"repl.last.id\"", metadata.matches(".*\"repl.last.id\":.*"));
    ImportCommand importMdCmd = new ImportCommand(dbName, importedTableName, null, exportLocation, true, evid);
    LOG.info("About to run :" + importMdCmd.get().get(0));
    CommandProcessorResponse ret3 = driver.run(importMdCmd.get().get(0));
    assertEquals(ret3.getResponseCode() + ":" + ret3.getErrorMessage(), null, ret3.getException());
    CommandProcessorResponse selectRet2 = driver.run("SELECT * from " + dbName + "." + importedTableName);
    assertEquals(selectRet2.getResponseCode() + ":" + selectRet2.getErrorMessage(), null, selectRet2.getException());
    List<String> values2 = new ArrayList<String>();
    driver.getResults(values2);
    assertEquals(0, values2.size());
    HCatTable importedTable = client.getTable(dbName, importedTableName);
    assertNotNull(importedTable);
    assertTrue(importedTable.getTblProps().containsKey("repl.last.id"));
}

Also used : Path(org.apache.hadoop.fs.Path) CommandProcessorResponse(org.apache.hadoop.hive.ql.processors.CommandProcessorResponse) ArrayList(java.util.ArrayList) HCatTable(org.apache.hive.hcatalog.api.HCatTable) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) Test(org.junit.Test)

Example 17 with HCatFieldSchema

use of org.apache.hive.hcatalog.data.schema.HCatFieldSchema in project hive by apache.

the class StoreNumbers method main.

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    args = new GenericOptionsParser(conf, args).getRemainingArgs();
    String[] otherArgs = new String[2];
    int j = 0;
    for (int i = 0; i < args.length; i++) {
        if (args[i].equals("-libjars")) {
            // generic options parser doesn't seem to work!
            conf.set("tmpjars", args[i + 1]);
            // skip it , the for loop will skip its value
            i = i + 1;
        } else {
            otherArgs[j++] = args[i];
        }
    }
    if (otherArgs.length != 2) {
        usage();
    }
    String serverUri = otherArgs[0];
    if (otherArgs[1] == null || (!otherArgs[1].equalsIgnoreCase("part") && !otherArgs[1].equalsIgnoreCase("nopart")) && !otherArgs[1].equalsIgnoreCase("nopart_pig")) {
        usage();
    }
    boolean writeToPartitionedTable = (otherArgs[1].equalsIgnoreCase("part"));
    boolean writeToNonPartPigTable = (otherArgs[1].equalsIgnoreCase("nopart_pig"));
    String tableName = NUMBERS_TABLE_NAME;
    String dbName = "default";
    Map<String, String> outputPartitionKvps = new HashMap<String, String>();
    String outputTableName = null;
    conf.set(IS_PIG_NON_PART_TABLE, "false");
    if (writeToPartitionedTable) {
        outputTableName = NUMBERS_PARTITIONED_TABLE_NAME;
        outputPartitionKvps.put("datestamp", "20100101");
    } else {
        if (writeToNonPartPigTable) {
            conf.set(IS_PIG_NON_PART_TABLE, "true");
            outputTableName = NUMBERS_NON_PARTITIONED_PIG_TABLE_NAME;
        } else {
            outputTableName = NUMBERS_NON_PARTITIONED_TABLE_NAME;
        }
        // test with null or empty randomly
        if (new Random().nextInt(2) == 0) {
            outputPartitionKvps = null;
        }
    }
    String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL);
    if (principalID != null)
        conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID);
    Job job = new Job(conf, "storenumbers");
    // initialize HCatInputFormat
    HCatInputFormat.setInput(job, dbName, tableName);
    // initialize HCatOutputFormat
    HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, outputTableName, outputPartitionKvps));
    // test with and without specifying schema randomly
    HCatSchema s = HCatInputFormat.getTableSchema(job);
    if (writeToNonPartPigTable) {
        List<HCatFieldSchema> newHfsList = new ArrayList<HCatFieldSchema>();
        // change smallint and tinyint to int
        for (HCatFieldSchema hfs : s.getFields()) {
            if (hfs.getTypeString().equals("smallint")) {
                newHfsList.add(new HCatFieldSchema(hfs.getName(), HCatFieldSchema.Type.INT, hfs.getComment()));
            } else if (hfs.getTypeString().equals("tinyint")) {
                newHfsList.add(new HCatFieldSchema(hfs.getName(), HCatFieldSchema.Type.INT, hfs.getComment()));
            } else {
                newHfsList.add(hfs);
            }
        }
        s = new HCatSchema(newHfsList);
    }
    HCatOutputFormat.setSchema(job, s);
    job.setInputFormatClass(HCatInputFormat.class);
    job.setOutputFormatClass(HCatOutputFormat.class);
    job.setJarByClass(StoreNumbers.class);
    job.setMapperClass(SumMapper.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setNumReduceTasks(0);
    job.setOutputValueClass(DefaultHCatRecord.class);
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

Also used : Configuration(org.apache.hadoop.conf.Configuration) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) Random(java.util.Random) HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) Job(org.apache.hadoop.mapreduce.Job) GenericOptionsParser(org.apache.hadoop.util.GenericOptionsParser)

Example 18 with HCatFieldSchema

use of org.apache.hive.hcatalog.data.schema.HCatFieldSchema in project hive by apache.

the class WriteTextPartitioned method run.

public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    args = new GenericOptionsParser(conf, args).getRemainingArgs();
    String serverUri = args[0];
    String inputTableName = args[1];
    String outputTableName = args[2];
    if (args.length > 3)
        filter = args[3];
    String dbName = null;
    String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL);
    if (principalID != null)
        conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID);
    Job job = new Job(conf, "WriteTextPartitioned");
    HCatInputFormat.setInput(job, dbName, inputTableName, filter);
    // initialize HCatOutputFormat
    job.setInputFormatClass(HCatInputFormat.class);
    job.setJarByClass(WriteTextPartitioned.class);
    job.setMapperClass(Map.class);
    job.setOutputKeyClass(WritableComparable.class);
    job.setOutputValueClass(DefaultHCatRecord.class);
    job.setNumReduceTasks(0);
    java.util.Map<String, String> partitionVals = null;
    if (filter != null) {
        String[] s = filter.split("=");
        String val = s[1].replace('"', ' ').trim();
        partitionVals = new HashMap<String, String>(1);
        partitionVals.put(s[0], val);
    }
    HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, outputTableName, partitionVals));
    HCatSchema s = HCatInputFormat.getTableSchema(job);
    // Build the schema for this table, which is slightly different than the
    // schema for the input table
    List<HCatFieldSchema> fss = new ArrayList<HCatFieldSchema>(3);
    fss.add(s.get(0));
    fss.add(s.get(1));
    fss.add(s.get(3));
    HCatOutputFormat.setSchema(job, new HCatSchema(fss));
    job.setOutputFormatClass(HCatOutputFormat.class);
    return (job.waitForCompletion(true) ? 0 : 1);
}

Also used : Configuration(org.apache.hadoop.conf.Configuration) HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) ArrayList(java.util.ArrayList) Job(org.apache.hadoop.mapreduce.Job) GenericOptionsParser(org.apache.hadoop.util.GenericOptionsParser) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema)

Example 19 with HCatFieldSchema

use of org.apache.hive.hcatalog.data.schema.HCatFieldSchema in project hive by apache.

the class TestPigHCatUtil method testGetBagSubSchema.

@Test
public void testGetBagSubSchema() throws Exception {
    // Define the expected schema.
    ResourceFieldSchema[] bagSubFieldSchemas = new ResourceFieldSchema[1];
    bagSubFieldSchemas[0] = new ResourceFieldSchema().setName("innertuple").setDescription("The tuple in the bag").setType(DataType.TUPLE);
    ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1];
    innerTupleFieldSchemas[0] = new ResourceFieldSchema().setName("innerfield").setType(DataType.CHARARRAY);
    bagSubFieldSchemas[0].setSchema(new ResourceSchema().setFields(innerTupleFieldSchemas));
    ResourceSchema expected = new ResourceSchema().setFields(bagSubFieldSchemas);
    // Get the actual converted schema.
    HCatSchema hCatSchema = new HCatSchema(Lists.newArrayList(new HCatFieldSchema("innerLlama", HCatFieldSchema.Type.STRING, null)));
    HCatFieldSchema hCatFieldSchema = new HCatFieldSchema("llama", HCatFieldSchema.Type.ARRAY, hCatSchema, null);
    ResourceSchema actual = PigHCatUtil.getBagSubSchema(hCatFieldSchema);
    Assert.assertEquals(expected.toString(), actual.toString());
}

Also used : ResourceSchema(org.apache.pig.ResourceSchema) HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) ResourceFieldSchema(org.apache.pig.ResourceSchema.ResourceFieldSchema) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) Test(org.junit.Test)

Example 20 with HCatFieldSchema

use of org.apache.hive.hcatalog.data.schema.HCatFieldSchema in project hive by apache.

the class PigHCatUtil method transformToBag.

private static DataBag transformToBag(List<?> list, HCatFieldSchema hfs) throws Exception {
    if (list == null) {
        return null;
    }
    HCatFieldSchema elementSubFieldSchema = hfs.getArrayElementSchema().getFields().get(0);
    DataBag db = new DefaultDataBag();
    for (Object o : list) {
        Tuple tuple;
        if (elementSubFieldSchema.getType() == Type.STRUCT) {
            tuple = transformToTuple((List<?>) o, elementSubFieldSchema);
        } else {
            // bags always contain tuples
            tuple = tupFac.newTuple(extractPigObject(o, elementSubFieldSchema));
        }
        db.add(tuple);
    }
    return db;
}

Also used : DataBag(org.apache.pig.data.DataBag) DefaultDataBag(org.apache.pig.data.DefaultDataBag) ArrayList(java.util.ArrayList) List(java.util.List) DefaultDataBag(org.apache.pig.data.DefaultDataBag) Tuple(org.apache.pig.data.Tuple) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema)

Aggregations

HCatFieldSchema (org.apache.hive.hcatalog.data.schema.HCatFieldSchema)61 ArrayList (java.util.ArrayList)34 Test (org.junit.Test)30 HCatException (org.apache.hive.hcatalog.common.HCatException)22 IOException (java.io.IOException)21 HCatSchema (org.apache.hive.hcatalog.data.schema.HCatSchema)21 HashMap (java.util.HashMap)19 Configuration (org.apache.hadoop.conf.Configuration)18 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)15 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)7 ResourceSchema (org.apache.pig.ResourceSchema)6 HCatTable (org.apache.hive.hcatalog.api.HCatTable)5 ResourceFieldSchema (org.apache.pig.ResourceSchema.ResourceFieldSchema)5 Map (java.util.Map)4 Properties (java.util.Properties)4 Path (org.apache.hadoop.fs.Path)4 List (java.util.List)3 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)3 CommandNeedRetryException (org.apache.hadoop.hive.ql.CommandNeedRetryException)3 FrontendException (org.apache.pig.impl.logicalLayer.FrontendException)3