Search in sources :

Example 36 with FileSplit

use of org.apache.hyracks.api.io.FileSplit in project asterixdb by apache.

the class FilePartition method get.

@Override
protected void get(IServletRequest request, IServletResponse response) {
    response.setStatus(HttpResponseStatus.OK);
    try {
        HttpUtil.setContentType(response, HttpUtil.ContentType.APPLICATION_JSON, HttpUtil.Encoding.UTF8);
    } catch (IOException e) {
        LOGGER.log(Level.WARNING, "Failure setting content type", e);
        response.setStatus(HttpResponseStatus.INTERNAL_SERVER_ERROR);
        response.writer().write(e.toString());
        return;
    }
    PrintWriter out = response.writer();
    try {
        ObjectMapper om = new ObjectMapper();
        ObjectNode jsonResponse = om.createObjectNode();
        String dataverseName = request.getParameter("dataverseName");
        String datasetName = request.getParameter("datasetName");
        if (dataverseName == null || datasetName == null) {
            jsonResponse.put("error", "Parameter dataverseName or datasetName is null,");
            out.write(jsonResponse.toString());
            return;
        }
        IHyracksClientConnection hcc = (IHyracksClientConnection) ctx.get(HYRACKS_CONNECTION_ATTR);
        // Metadata transaction begins.
        MetadataManager.INSTANCE.init();
        MetadataTransactionContext mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
        // Retrieves file splits of the dataset.
        MetadataProvider metadataProvider = new MetadataProvider(appCtx, null, new StorageComponentProvider());
        try {
            metadataProvider.setMetadataTxnContext(mdTxnCtx);
            Dataset dataset = metadataProvider.findDataset(dataverseName, datasetName);
            if (dataset == null) {
                jsonResponse.put("error", "Dataset " + datasetName + " does not exist in " + "dataverse " + dataverseName);
                out.write(jsonResponse.toString());
                out.flush();
                return;
            }
            boolean temp = dataset.getDatasetDetails().isTemp();
            FileSplit[] fileSplits = metadataProvider.splitsForIndex(mdTxnCtx, dataset, datasetName);
            ARecordType recordType = (ARecordType) metadataProvider.findType(dataset.getItemTypeDataverseName(), dataset.getItemTypeName());
            List<List<String>> primaryKeys = dataset.getPrimaryKeys();
            StringBuilder pkStrBuf = new StringBuilder();
            for (List<String> keys : primaryKeys) {
                for (String key : keys) {
                    pkStrBuf.append(key).append(",");
                }
            }
            pkStrBuf.delete(pkStrBuf.length() - 1, pkStrBuf.length());
            // Constructs the returned json object.
            formResponseObject(jsonResponse, fileSplits, recordType, pkStrBuf.toString(), temp, hcc.getNodeControllerInfos());
            // Flush the cached contents of the dataset to file system.
            FlushDatasetUtil.flushDataset(hcc, metadataProvider, dataverseName, datasetName, datasetName);
            // Metadata transaction commits.
            MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
            // Writes file splits.
            out.write(jsonResponse.toString());
        } finally {
            metadataProvider.getLocks().unlock();
        }
    } catch (Exception e) {
        LOGGER.log(Level.WARNING, "Failure handling a request", e);
        response.setStatus(HttpResponseStatus.INTERNAL_SERVER_ERROR);
        out.write(e.toString());
    } finally {
        out.flush();
    }
}
Also used : IHyracksClientConnection(org.apache.hyracks.api.client.IHyracksClientConnection) ObjectNode(com.fasterxml.jackson.databind.node.ObjectNode) Dataset(org.apache.asterix.metadata.entities.Dataset) MetadataTransactionContext(org.apache.asterix.metadata.MetadataTransactionContext) StorageComponentProvider(org.apache.asterix.file.StorageComponentProvider) IOException(java.io.IOException) FileSplit(org.apache.hyracks.api.io.FileSplit) IOException(java.io.IOException) MetadataProvider(org.apache.asterix.metadata.declared.MetadataProvider) List(java.util.List) ARecordType(org.apache.asterix.om.types.ARecordType) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) PrintWriter(java.io.PrintWriter)

Example 37 with FileSplit

use of org.apache.hyracks.api.io.FileSplit in project asterixdb by apache.

the class QueryTranslator method compileAndExecute.

@Override
public void compileAndExecute(IHyracksClientConnection hcc, IHyracksDataset hdc, ResultDelivery resultDelivery, ResultMetadata outMetadata, Stats stats, String clientContextId, IStatementExecutorContext ctx) throws Exception {
    int resultSetIdCounter = 0;
    FileSplit outputFile = null;
    IAWriterFactory writerFactory = PrinterBasedWriterFactory.INSTANCE;
    IResultSerializerFactoryProvider resultSerializerFactoryProvider = ResultSerializerFactoryProvider.INSTANCE;
    Map<String, String> config = new HashMap<>();
    /* Since the system runs a large number of threads, when HTTP requests don't return, it becomes difficult to
         * find the thread running the request to determine where it has stopped.
         * Setting the thread name helps make that easier
         */
    String threadName = Thread.currentThread().getName();
    Thread.currentThread().setName(QueryTranslator.class.getSimpleName());
    try {
        for (Statement stmt : statements) {
            if (sessionConfig.is(SessionConfig.FORMAT_HTML)) {
                sessionOutput.out().println(ApiServlet.HTML_STATEMENT_SEPARATOR);
            }
            validateOperation(appCtx, activeDataverse, stmt);
            // Rewrite the statement's AST.
            rewriteStatement(stmt);
            MetadataProvider metadataProvider = new MetadataProvider(appCtx, activeDataverse, componentProvider);
            metadataProvider.setWriterFactory(writerFactory);
            metadataProvider.setResultSerializerFactoryProvider(resultSerializerFactoryProvider);
            metadataProvider.setOutputFile(outputFile);
            metadataProvider.setConfig(config);
            switch(stmt.getKind()) {
                case Statement.Kind.SET:
                    handleSetStatement(stmt, config);
                    break;
                case Statement.Kind.DATAVERSE_DECL:
                    activeDataverse = handleUseDataverseStatement(metadataProvider, stmt);
                    break;
                case Statement.Kind.CREATE_DATAVERSE:
                    handleCreateDataverseStatement(metadataProvider, stmt);
                    break;
                case Statement.Kind.DATASET_DECL:
                    handleCreateDatasetStatement(metadataProvider, stmt, hcc);
                    break;
                case Statement.Kind.CREATE_INDEX:
                    handleCreateIndexStatement(metadataProvider, stmt, hcc);
                    break;
                case Statement.Kind.TYPE_DECL:
                    handleCreateTypeStatement(metadataProvider, stmt);
                    break;
                case Statement.Kind.NODEGROUP_DECL:
                    handleCreateNodeGroupStatement(metadataProvider, stmt);
                    break;
                case Statement.Kind.DATAVERSE_DROP:
                    handleDataverseDropStatement(metadataProvider, stmt, hcc);
                    break;
                case Statement.Kind.DATASET_DROP:
                    handleDatasetDropStatement(metadataProvider, stmt, hcc);
                    break;
                case Statement.Kind.INDEX_DROP:
                    handleIndexDropStatement(metadataProvider, stmt, hcc);
                    break;
                case Statement.Kind.TYPE_DROP:
                    handleTypeDropStatement(metadataProvider, stmt);
                    break;
                case Statement.Kind.NODEGROUP_DROP:
                    handleNodegroupDropStatement(metadataProvider, stmt);
                    break;
                case Statement.Kind.CREATE_FUNCTION:
                    handleCreateFunctionStatement(metadataProvider, stmt);
                    break;
                case Statement.Kind.FUNCTION_DROP:
                    handleFunctionDropStatement(metadataProvider, stmt);
                    break;
                case Statement.Kind.LOAD:
                    handleLoadStatement(metadataProvider, stmt, hcc);
                    break;
                case Statement.Kind.INSERT:
                case Statement.Kind.UPSERT:
                    if (((InsertStatement) stmt).getReturnExpression() != null) {
                        metadataProvider.setResultSetId(new ResultSetId(resultSetIdCounter++));
                        metadataProvider.setResultAsyncMode(resultDelivery == ResultDelivery.ASYNC || resultDelivery == ResultDelivery.DEFERRED);
                    }
                    handleInsertUpsertStatement(metadataProvider, stmt, hcc, hdc, resultDelivery, outMetadata, stats, false, clientContextId, ctx);
                    break;
                case Statement.Kind.DELETE:
                    handleDeleteStatement(metadataProvider, stmt, hcc, false);
                    break;
                case Statement.Kind.CREATE_FEED:
                    handleCreateFeedStatement(metadataProvider, stmt);
                    break;
                case Statement.Kind.DROP_FEED:
                    handleDropFeedStatement(metadataProvider, stmt, hcc);
                    break;
                case Statement.Kind.DROP_FEED_POLICY:
                    handleDropFeedPolicyStatement(metadataProvider, stmt);
                    break;
                case Statement.Kind.CONNECT_FEED:
                    handleConnectFeedStatement(metadataProvider, stmt);
                    break;
                case Statement.Kind.DISCONNECT_FEED:
                    handleDisconnectFeedStatement(metadataProvider, stmt);
                    break;
                case Statement.Kind.START_FEED:
                    handleStartFeedStatement(metadataProvider, stmt, hcc);
                    break;
                case Statement.Kind.STOP_FEED:
                    handleStopFeedStatement(metadataProvider, stmt);
                    break;
                case Statement.Kind.CREATE_FEED_POLICY:
                    handleCreateFeedPolicyStatement(metadataProvider, stmt);
                    break;
                case Statement.Kind.QUERY:
                    metadataProvider.setResultSetId(new ResultSetId(resultSetIdCounter++));
                    metadataProvider.setResultAsyncMode(resultDelivery == ResultDelivery.ASYNC || resultDelivery == ResultDelivery.DEFERRED);
                    handleQuery(metadataProvider, (Query) stmt, hcc, hdc, resultDelivery, outMetadata, stats, clientContextId, ctx);
                    break;
                case Statement.Kind.COMPACT:
                    handleCompactStatement(metadataProvider, stmt, hcc);
                    break;
                case Statement.Kind.EXTERNAL_DATASET_REFRESH:
                    handleExternalDatasetRefreshStatement(metadataProvider, stmt, hcc);
                    break;
                case Statement.Kind.WRITE:
                    Pair<IAWriterFactory, FileSplit> result = handleWriteStatement(stmt);
                    writerFactory = (result.first != null) ? result.first : writerFactory;
                    outputFile = result.second;
                    break;
                case Statement.Kind.RUN:
                    handleRunStatement(metadataProvider, stmt, hcc);
                    break;
                case Statement.Kind.FUNCTION_DECL:
                    // No op
                    break;
                case Statement.Kind.EXTENSION:
                    ((IExtensionStatement) stmt).handle(this, metadataProvider, hcc, hdc, resultDelivery, stats, resultSetIdCounter);
                    break;
                default:
                    throw new CompilationException("Unknown function");
            }
        }
    } finally {
        Thread.currentThread().setName(threadName);
    }
}
Also used : IExtensionStatement(org.apache.asterix.algebra.extension.IExtensionStatement) CompilationException(org.apache.asterix.common.exceptions.CompilationException) HashMap(java.util.HashMap) IResultSerializerFactoryProvider(org.apache.hyracks.algebricks.data.IResultSerializerFactoryProvider) StopFeedStatement(org.apache.asterix.lang.common.statement.StopFeedStatement) FunctionDropStatement(org.apache.asterix.lang.common.statement.FunctionDropStatement) LoadStatement(org.apache.asterix.lang.common.statement.LoadStatement) CompiledInsertStatement(org.apache.asterix.translator.CompiledStatements.CompiledInsertStatement) CreateDataverseStatement(org.apache.asterix.lang.common.statement.CreateDataverseStatement) InsertStatement(org.apache.asterix.lang.common.statement.InsertStatement) CompiledLoadFromFileStatement(org.apache.asterix.translator.CompiledStatements.CompiledLoadFromFileStatement) CreateFeedPolicyStatement(org.apache.asterix.lang.common.statement.CreateFeedPolicyStatement) CreateIndexStatement(org.apache.asterix.lang.common.statement.CreateIndexStatement) RunStatement(org.apache.asterix.lang.common.statement.RunStatement) IExtensionStatement(org.apache.asterix.algebra.extension.IExtensionStatement) FeedPolicyDropStatement(org.apache.asterix.lang.common.statement.FeedPolicyDropStatement) Statement(org.apache.asterix.lang.common.base.Statement) DisconnectFeedStatement(org.apache.asterix.lang.common.statement.DisconnectFeedStatement) CompiledDeleteStatement(org.apache.asterix.translator.CompiledStatements.CompiledDeleteStatement) CreateFeedStatement(org.apache.asterix.lang.common.statement.CreateFeedStatement) DeleteStatement(org.apache.asterix.lang.common.statement.DeleteStatement) DataverseDropStatement(org.apache.asterix.lang.common.statement.DataverseDropStatement) TypeDropStatement(org.apache.asterix.lang.common.statement.TypeDropStatement) CompactStatement(org.apache.asterix.lang.common.statement.CompactStatement) StartFeedStatement(org.apache.asterix.lang.common.statement.StartFeedStatement) NodeGroupDropStatement(org.apache.asterix.lang.common.statement.NodeGroupDropStatement) RefreshExternalDatasetStatement(org.apache.asterix.lang.common.statement.RefreshExternalDatasetStatement) SetStatement(org.apache.asterix.lang.common.statement.SetStatement) CompiledUpsertStatement(org.apache.asterix.translator.CompiledStatements.CompiledUpsertStatement) ConnectFeedStatement(org.apache.asterix.lang.common.statement.ConnectFeedStatement) ICompiledDmlStatement(org.apache.asterix.translator.CompiledStatements.ICompiledDmlStatement) IndexDropStatement(org.apache.asterix.lang.common.statement.IndexDropStatement) CreateFunctionStatement(org.apache.asterix.lang.common.statement.CreateFunctionStatement) WriteStatement(org.apache.asterix.lang.common.statement.WriteStatement) IReturningStatement(org.apache.asterix.lang.common.base.IReturningStatement) DropDatasetStatement(org.apache.asterix.lang.common.statement.DropDatasetStatement) FeedDropStatement(org.apache.asterix.lang.common.statement.FeedDropStatement) FileSplit(org.apache.hyracks.api.io.FileSplit) UnmanagedFileSplit(org.apache.hyracks.api.io.UnmanagedFileSplit) DatasetNodegroupCardinalityHint(org.apache.asterix.metadata.dataset.hints.DatasetHints.DatasetNodegroupCardinalityHint) AlgebricksAbsolutePartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint) MetadataProvider(org.apache.asterix.metadata.declared.MetadataProvider) ResultSetId(org.apache.hyracks.api.dataset.ResultSetId) IAWriterFactory(org.apache.hyracks.algebricks.data.IAWriterFactory)

Example 38 with FileSplit

use of org.apache.hyracks.api.io.FileSplit in project asterixdb by apache.

the class QueryTranslator method handleWriteStatement.

protected Pair<IAWriterFactory, FileSplit> handleWriteStatement(Statement stmt) throws InstantiationException, IllegalAccessException, ClassNotFoundException {
    WriteStatement ws = (WriteStatement) stmt;
    File f = new File(ws.getFileName());
    FileSplit outputFile = new UnmanagedFileSplit(ws.getNcName().getValue(), f.getPath());
    IAWriterFactory writerFactory = null;
    if (ws.getWriterClassName() != null) {
        writerFactory = (IAWriterFactory) Class.forName(ws.getWriterClassName()).newInstance();
    }
    return new Pair<>(writerFactory, outputFile);
}
Also used : WriteStatement(org.apache.asterix.lang.common.statement.WriteStatement) FileSplit(org.apache.hyracks.api.io.FileSplit) UnmanagedFileSplit(org.apache.hyracks.api.io.UnmanagedFileSplit) UnmanagedFileSplit(org.apache.hyracks.api.io.UnmanagedFileSplit) ExternalFile(org.apache.asterix.external.indexing.ExternalFile) File(java.io.File) IAWriterFactory(org.apache.hyracks.algebricks.data.IAWriterFactory) Pair(org.apache.hyracks.algebricks.common.utils.Pair)

Example 39 with FileSplit

use of org.apache.hyracks.api.io.FileSplit in project asterixdb by apache.

the class ConnectorApiServletTest method testFormResponseObject.

@Test
public void testFormResponseObject() throws Exception {
    ConnectorApiServlet let = new ConnectorApiServlet(new ConcurrentHashMap<>(), new String[] { "/" }, (ICcApplicationContext) ExecutionTestUtil.integrationUtil.cc.getApplicationContext());
    ObjectMapper om = new ObjectMapper();
    ObjectNode actualResponse = om.createObjectNode();
    FileSplit[] splits = new FileSplit[2];
    splits[0] = new ManagedFileSplit("asterix_nc1", "foo1");
    splits[1] = new ManagedFileSplit("asterix_nc2", "foo2");
    Map<String, NodeControllerInfo> nodeMap = new HashMap<>();
    NodeControllerInfo mockInfo1 = mock(NodeControllerInfo.class);
    NodeControllerInfo mockInfo2 = mock(NodeControllerInfo.class);
    // Sets up mock returns.
    when(mockInfo1.getNetworkAddress()).thenReturn(new NetworkAddress("127.0.0.1", 3099));
    when(mockInfo2.getNetworkAddress()).thenReturn(new NetworkAddress("127.0.0.2", 3099));
    String[] fieldNames = new String[] { "a1", "a2" };
    IAType[] fieldTypes = new IAType[] { BuiltinType.ABOOLEAN, BuiltinType.ADAYTIMEDURATION };
    ARecordType recordType = new ARecordType("record", fieldNames, fieldTypes, true);
    String primaryKey = "a1";
    // Calls ConnectorAPIServlet.formResponseObject.
    nodeMap.put("asterix_nc1", mockInfo1);
    nodeMap.put("asterix_nc2", mockInfo2);
    PA.invokeMethod(let, "formResponseObject(" + ObjectNode.class.getName() + ", " + FileSplit.class.getName() + "[], " + ARecordType.class.getName() + ", " + String.class.getName() + ", boolean, " + Map.class.getName() + ")", actualResponse, splits, recordType, primaryKey, true, nodeMap);
    // Constructs expected response.
    ObjectNode expectedResponse = om.createObjectNode();
    expectedResponse.put("temp", true);
    expectedResponse.put("keys", primaryKey);
    expectedResponse.set("type", recordType.toJSON());
    ArrayNode splitsArray = om.createArrayNode();
    ObjectNode element1 = om.createObjectNode();
    element1.put("ip", "127.0.0.1");
    element1.put("path", splits[0].getPath());
    ObjectNode element2 = om.createObjectNode();
    element2.put("ip", "127.0.0.2");
    element2.put("path", splits[1].getPath());
    splitsArray.add(element1);
    splitsArray.add(element2);
    expectedResponse.set("splits", splitsArray);
    // Checks results.
    Assert.assertEquals(actualResponse.toString(), expectedResponse.toString());
}
Also used : ObjectNode(com.fasterxml.jackson.databind.node.ObjectNode) HashMap(java.util.HashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) FileSplit(org.apache.hyracks.api.io.FileSplit) ManagedFileSplit(org.apache.hyracks.api.io.ManagedFileSplit) NetworkAddress(org.apache.hyracks.api.comm.NetworkAddress) NodeControllerInfo(org.apache.hyracks.api.client.NodeControllerInfo) ArrayNode(com.fasterxml.jackson.databind.node.ArrayNode) ConnectorApiServlet(org.apache.asterix.api.http.server.ConnectorApiServlet) ARecordType(org.apache.asterix.om.types.ARecordType) HashMap(java.util.HashMap) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) IAType(org.apache.asterix.om.types.IAType) Test(org.junit.Test) SqlppExecutionTest(org.apache.asterix.test.runtime.SqlppExecutionTest)

Example 40 with FileSplit

use of org.apache.hyracks.api.io.FileSplit in project asterixdb by apache.

the class SplitsAndConstraintsUtil method getDataverseSplits.

private static FileSplit[] getDataverseSplits(String dataverseName) {
    File relPathFile = new File(dataverseName);
    List<FileSplit> splits = new ArrayList<>();
    // get all partitions
    ClusterPartition[] clusterPartition = ClusterStateManager.INSTANCE.getClusterPartitons();
    String storageDirName = ClusterProperties.INSTANCE.getStorageDirectoryName();
    for (int j = 0; j < clusterPartition.length; j++) {
        File f = new File(StoragePathUtil.prepareStoragePartitionPath(storageDirName, clusterPartition[j].getPartitionId()) + File.separator + relPathFile);
        splits.add(StoragePathUtil.getFileSplitForClusterPartition(clusterPartition[j], f.getPath()));
    }
    return splits.toArray(new FileSplit[] {});
}
Also used : ArrayList(java.util.ArrayList) FileSplit(org.apache.hyracks.api.io.FileSplit) File(java.io.File) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) ClusterPartition(org.apache.asterix.common.cluster.ClusterPartition)

Aggregations

FileSplit (org.apache.hyracks.api.io.FileSplit)63 ManagedFileSplit (org.apache.hyracks.api.io.ManagedFileSplit)43 ConstantFileSplitProvider (org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider)42 JobSpecification (org.apache.hyracks.api.job.JobSpecification)40 DelimitedDataTupleParserFactory (org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory)39 FileScanOperatorDescriptor (org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor)39 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)39 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)38 UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)33 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)33 Test (org.junit.Test)33 IValueParserFactory (org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory)32 IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)28 ResultSetId (org.apache.hyracks.api.dataset.ResultSetId)23 IConnectorDescriptor (org.apache.hyracks.api.dataflow.IConnectorDescriptor)21 ResultWriterOperatorDescriptor (org.apache.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor)21 File (java.io.File)18 MToNBroadcastConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.MToNBroadcastConnectorDescriptor)18 FieldHashPartitionComputerFactory (org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory)14 IBinaryHashFunctionFactory (org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory)10