Search in sources :

Example 1 with Schema

use of org.apache.hadoop.hive.llap.Schema in project hive by apache.

the class LlapInputSplit method readFields.

@Override
public void readFields(DataInput in) throws IOException {
    splitNum = in.readInt();
    int length = in.readInt();
    planBytes = new byte[length];
    in.readFully(planBytes);
    length = in.readInt();
    fragmentBytes = new byte[length];
    in.readFully(fragmentBytes);
    length = in.readInt();
    if (length > 0) {
        fragmentBytesSignature = new byte[length];
        in.readFully(fragmentBytesSignature);
    }
    length = in.readInt();
    locations = new SplitLocationInfo[length];
    for (int i = 0; i < length; ++i) {
        locations[i] = new SplitLocationInfo(in.readUTF(), false);
    }
    schema = new Schema();
    schema.readFields(in);
    llapUser = in.readUTF();
    length = in.readInt();
    if (length > 0) {
        tokenBytes = new byte[length];
        in.readFully(tokenBytes);
    }
}
Also used : SplitLocationInfo(org.apache.hadoop.mapred.SplitLocationInfo) Schema(org.apache.hadoop.hive.llap.Schema)

Example 2 with Schema

use of org.apache.hadoop.hive.llap.Schema in project hive by apache.

the class GenericUDTFGetSplits method process.

@Override
public void process(Object[] arguments) throws HiveException {
    String query = stringOI.getPrimitiveJavaObject(arguments[0]);
    int num = intOI.get(arguments[1]);
    // Generate applicationId for the LLAP splits
    LlapCoordinator coordinator = LlapCoordinator.getInstance();
    if (coordinator == null) {
        throw new HiveException("LLAP coordinator is not initialized; must be running in HS2 with " + ConfVars.LLAP_HS2_ENABLE_COORDINATOR.varname + " enabled");
    }
    ApplicationId applicationId = coordinator.createExtClientAppId();
    LOG.info("Generated appID {} for LLAP splits", applicationId.toString());
    PlanFragment fragment = createPlanFragment(query, num, applicationId);
    TezWork tezWork = fragment.work;
    Schema schema = fragment.schema;
    try {
        for (InputSplit s : getSplits(jc, num, tezWork, schema, applicationId)) {
            Object[] os = new Object[1];
            bos.reset();
            s.write(dos);
            byte[] frozen = bos.toByteArray();
            os[0] = frozen;
            forward(os);
        }
    } catch (Exception e) {
        throw new HiveException(e);
    }
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) Schema(org.apache.hadoop.hive.llap.Schema) TaskLocationHint(org.apache.tez.dag.api.TaskLocationHint) LlapCoordinator(org.apache.hadoop.hive.llap.coordinator.LlapCoordinator) LoginException(javax.security.auth.login.LoginException) URISyntaxException(java.net.URISyntaxException) UDFArgumentLengthException(org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException) FileNotFoundException(java.io.FileNotFoundException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) UDFArgumentException(org.apache.hadoop.hive.ql.exec.UDFArgumentException) UDFArgumentTypeException(org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException) IOException(java.io.IOException) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) LlapInputSplit(org.apache.hadoop.hive.llap.LlapInputSplit) InputSplit(org.apache.hadoop.mapred.InputSplit) TezWork(org.apache.hadoop.hive.ql.plan.TezWork)

Example 3 with Schema

use of org.apache.hadoop.hive.llap.Schema in project hive by apache.

the class GenericUDTFGetSplits method createPlanFragment.

public PlanFragment createPlanFragment(String query, int num, ApplicationId splitsAppId) throws HiveException {
    HiveConf conf = new HiveConf(SessionState.get().getConf());
    HiveConf.setVar(conf, ConfVars.HIVEFETCHTASKCONVERSION, "none");
    HiveConf.setVar(conf, ConfVars.HIVEQUERYRESULTFILEFORMAT, PlanUtils.LLAP_OUTPUT_FORMAT_KEY);
    String originalMode = HiveConf.getVar(conf, ConfVars.HIVE_EXECUTION_MODE);
    HiveConf.setVar(conf, ConfVars.HIVE_EXECUTION_MODE, "llap");
    HiveConf.setBoolVar(conf, ConfVars.HIVE_TEZ_GENERATE_CONSISTENT_SPLITS, true);
    HiveConf.setBoolVar(conf, ConfVars.LLAP_CLIENT_CONSISTENT_SPLITS, true);
    conf.setBoolean(TezSplitGrouper.TEZ_GROUPING_NODE_LOCAL_ONLY, true);
    // Tez/LLAP requires RPC query plan
    HiveConf.setBoolVar(conf, ConfVars.HIVE_RPC_QUERY_PLAN, true);
    try {
        jc = DagUtils.getInstance().createConfiguration(conf);
    } catch (IOException e) {
        throw new HiveException(e);
    }
    // Instantiate Driver to compile the query passed in.
    // This UDF is running as part of an existing query, which may already be using the
    // SessionState TxnManager. If this new Driver also tries to use the same TxnManager
    // then this may mess up the existing state of the TxnManager.
    // So initialize the new Driver with a new TxnManager so that it does not use the
    // Session TxnManager that is already in use.
    HiveTxnManager txnManager = TxnManagerFactory.getTxnManagerFactory().getTxnManager(conf);
    Driver driver = new Driver(new QueryState.Builder().withHiveConf(conf).nonIsolated().build(), null, null, txnManager);
    DriverCleanup driverCleanup = new DriverCleanup(driver, txnManager, splitsAppId.toString());
    boolean needsCleanup = true;
    try {
        CommandProcessorResponse cpr = driver.compileAndRespond(query);
        if (cpr.getResponseCode() != 0) {
            throw new HiveException("Failed to compile query: " + cpr.getException());
        }
        QueryPlan plan = driver.getPlan();
        List<Task<?>> roots = plan.getRootTasks();
        Schema schema = convertSchema(plan.getResultSchema());
        if (roots == null || roots.size() != 1 || !(roots.get(0) instanceof TezTask)) {
            throw new HiveException("Was expecting a single TezTask.");
        }
        TezWork tezWork = ((TezTask) roots.get(0)).getWork();
        if (tezWork.getAllWork().size() != 1) {
            String tableName = "table_" + UUID.randomUUID().toString().replaceAll("[^A-Za-z0-9 ]", "");
            String ctas = "create temporary table " + tableName + " as " + query;
            LOG.info("Materializing the query for LLAPIF; CTAS: " + ctas);
            driver.releaseResources();
            HiveConf.setVar(conf, ConfVars.HIVE_EXECUTION_MODE, originalMode);
            cpr = driver.run(ctas, false);
            if (cpr.getResponseCode() != 0) {
                throw new HiveException("Failed to create temp table: " + cpr.getException());
            }
            HiveConf.setVar(conf, ConfVars.HIVE_EXECUTION_MODE, "llap");
            query = "select * from " + tableName;
            cpr = driver.compileAndRespond(query);
            if (cpr.getResponseCode() != 0) {
                throw new HiveException("Failed to create temp table: " + cpr.getException());
            }
            plan = driver.getPlan();
            roots = plan.getRootTasks();
            schema = convertSchema(plan.getResultSchema());
            if (roots == null || roots.size() != 1 || !(roots.get(0) instanceof TezTask)) {
                throw new HiveException("Was expecting a single TezTask.");
            }
            tezWork = ((TezTask) roots.get(0)).getWork();
        } else {
            // The read will have READ_COMMITTED level semantics.
            try {
                driver.lockAndRespond();
            } catch (CommandProcessorResponse cpr1) {
                throw new HiveException("Failed to acquire locks", cpr1);
            }
            // Attach the resources to the session cleanup.
            SessionState.get().addCleanupItem(driverCleanup);
            needsCleanup = false;
        }
        // Pass the ValidTxnList and ValidTxnWriteIdList snapshot configurations corresponding to the input query
        HiveConf driverConf = driver.getConf();
        String validTxnString = driverConf.get(ValidTxnList.VALID_TXNS_KEY);
        if (validTxnString != null) {
            jc.set(ValidTxnList.VALID_TXNS_KEY, validTxnString);
        }
        String validWriteIdString = driverConf.get(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY);
        if (validWriteIdString != null) {
            jc.set(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY, validWriteIdString);
        }
        return new PlanFragment(tezWork, schema, jc);
    } finally {
        if (needsCleanup) {
            if (driverCleanup != null) {
                try {
                    driverCleanup.close();
                } catch (IOException err) {
                    throw new HiveException(err);
                }
            } else if (driver != null) {
                driver.close();
                driver.destroy();
            }
        }
    }
}
Also used : TezTask(org.apache.hadoop.hive.ql.exec.tez.TezTask) Task(org.apache.hadoop.hive.ql.exec.Task) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) TaskSpecBuilder(org.apache.tez.dag.api.TaskSpecBuilder) CommandProcessorResponse(org.apache.hadoop.hive.ql.processors.CommandProcessorResponse) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) Schema(org.apache.hadoop.hive.llap.Schema) Driver(org.apache.hadoop.hive.ql.Driver) IOException(java.io.IOException) QueryPlan(org.apache.hadoop.hive.ql.QueryPlan) TezTask(org.apache.hadoop.hive.ql.exec.tez.TezTask) HiveTxnManager(org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager) HiveConf(org.apache.hadoop.hive.conf.HiveConf) TezWork(org.apache.hadoop.hive.ql.plan.TezWork)

Example 4 with Schema

use of org.apache.hadoop.hive.llap.Schema in project hive by apache.

the class LlapRowRecordReader method setRowFromStruct.

static void setRowFromStruct(Row row, Object structVal, StructObjectInspector soi) {
    Schema structSchema = row.getSchema();
    // Add struct field data to the Row
    List<? extends StructField> structFields = soi.getAllStructFieldRefs();
    for (int idx = 0; idx < structFields.size(); ++idx) {
        StructField structField = structFields.get(idx);
        Object convertedFieldValue = convertValue(soi.getStructFieldData(structVal, structField), structField.getFieldObjectInspector());
        row.setValue(idx, convertedFieldValue);
    }
}
Also used : StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) Schema(org.apache.hadoop.hive.llap.Schema)

Example 5 with Schema

use of org.apache.hadoop.hive.llap.Schema in project hive by apache.

the class GenericUDTFGetSplits method createPlanFragment.

public PlanFragment createPlanFragment(String query, int num) throws HiveException {
    HiveConf conf = new HiveConf(SessionState.get().getConf());
    HiveConf.setVar(conf, ConfVars.HIVEFETCHTASKCONVERSION, "none");
    HiveConf.setVar(conf, ConfVars.HIVEQUERYRESULTFILEFORMAT, PlanUtils.LLAP_OUTPUT_FORMAT_KEY);
    String originalMode = HiveConf.getVar(conf, ConfVars.HIVE_EXECUTION_MODE);
    HiveConf.setVar(conf, ConfVars.HIVE_EXECUTION_MODE, "llap");
    HiveConf.setBoolVar(conf, ConfVars.HIVE_TEZ_GENERATE_CONSISTENT_SPLITS, true);
    HiveConf.setBoolVar(conf, ConfVars.LLAP_CLIENT_CONSISTENT_SPLITS, true);
    conf.setBoolean(TezSplitGrouper.TEZ_GROUPING_NODE_LOCAL_ONLY, true);
    // Tez/LLAP requires RPC query plan
    HiveConf.setBoolVar(conf, ConfVars.HIVE_RPC_QUERY_PLAN, true);
    try {
        jc = DagUtils.getInstance().createConfiguration(conf);
    } catch (IOException e) {
        throw new HiveException(e);
    }
    Driver driver = new Driver(conf);
    try {
        CommandProcessorResponse cpr = driver.compileAndRespond(query);
        if (cpr.getResponseCode() != 0) {
            throw new HiveException("Failed to compile query: " + cpr.getException());
        }
        QueryPlan plan = driver.getPlan();
        List<Task<?>> roots = plan.getRootTasks();
        Schema schema = convertSchema(plan.getResultSchema());
        if (roots == null || roots.size() != 1 || !(roots.get(0) instanceof TezTask)) {
            throw new HiveException("Was expecting a single TezTask.");
        }
        TezWork tezWork = ((TezTask) roots.get(0)).getWork();
        if (tezWork.getAllWork().size() != 1) {
            String tableName = "table_" + UUID.randomUUID().toString().replaceAll("[^A-Za-z0-9 ]", "");
            String ctas = "create temporary table " + tableName + " as " + query;
            LOG.info("Materializing the query for LLAPIF; CTAS: " + ctas);
            try {
                driver.resetQueryState();
                HiveConf.setVar(conf, ConfVars.HIVE_EXECUTION_MODE, originalMode);
                cpr = driver.run(ctas, false);
            } catch (CommandNeedRetryException e) {
                throw new HiveException(e);
            }
            if (cpr.getResponseCode() != 0) {
                throw new HiveException("Failed to create temp table: " + cpr.getException());
            }
            HiveConf.setVar(conf, ConfVars.HIVE_EXECUTION_MODE, "llap");
            query = "select * from " + tableName;
            cpr = driver.compileAndRespond(query);
            if (cpr.getResponseCode() != 0) {
                throw new HiveException("Failed to create temp table: " + cpr.getException());
            }
            plan = driver.getPlan();
            roots = plan.getRootTasks();
            schema = convertSchema(plan.getResultSchema());
            if (roots == null || roots.size() != 1 || !(roots.get(0) instanceof TezTask)) {
                throw new HiveException("Was expecting a single TezTask.");
            }
            tezWork = ((TezTask) roots.get(0)).getWork();
        }
        return new PlanFragment(tezWork, schema, jc);
    } finally {
        driver.close();
        driver.destroy();
    }
}
Also used : TezTask(org.apache.hadoop.hive.ql.exec.tez.TezTask) Task(org.apache.hadoop.hive.ql.exec.Task) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) CommandProcessorResponse(org.apache.hadoop.hive.ql.processors.CommandProcessorResponse) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) Schema(org.apache.hadoop.hive.llap.Schema) Driver(org.apache.hadoop.hive.ql.Driver) IOException(java.io.IOException) QueryPlan(org.apache.hadoop.hive.ql.QueryPlan) TezTask(org.apache.hadoop.hive.ql.exec.tez.TezTask) CommandNeedRetryException(org.apache.hadoop.hive.ql.CommandNeedRetryException) HiveConf(org.apache.hadoop.hive.conf.HiveConf) TezWork(org.apache.hadoop.hive.ql.plan.TezWork)

Aggregations

Schema (org.apache.hadoop.hive.llap.Schema)7 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)4 IOException (java.io.IOException)3 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)3 TezWork (org.apache.hadoop.hive.ql.plan.TezWork)3 ArrayList (java.util.ArrayList)2 HiveConf (org.apache.hadoop.hive.conf.HiveConf)2 FieldDesc (org.apache.hadoop.hive.llap.FieldDesc)2 LlapInputSplit (org.apache.hadoop.hive.llap.LlapInputSplit)2 Driver (org.apache.hadoop.hive.ql.Driver)2 QueryPlan (org.apache.hadoop.hive.ql.QueryPlan)2 Task (org.apache.hadoop.hive.ql.exec.Task)2 TezTask (org.apache.hadoop.hive.ql.exec.tez.TezTask)2 CommandProcessorResponse (org.apache.hadoop.hive.ql.processors.CommandProcessorResponse)2 SplitLocationInfo (org.apache.hadoop.mapred.SplitLocationInfo)2 ByteArrayInputStream (java.io.ByteArrayInputStream)1 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 DataInputStream (java.io.DataInputStream)1 DataOutputStream (java.io.DataOutputStream)1 FileNotFoundException (java.io.FileNotFoundException)1