Search in sources :

Example 1 with ServiceInterruption

use of org.apache.manifoldcf.agents.interfaces.ServiceInterruption in project manifoldcf by apache.

the class AmazonCloudSearchConnector method flushDocuments.

protected void flushDocuments(IOutputHistoryActivity activities) throws ManifoldCFException, ServiceInterruption {
    Logging.ingest.info("AmazonCloudSearch: Starting flush to Amazon");
    // Repeat until we are empty of cached stuff
    int chunkNumber = 0;
    while (true) {
        DocumentRecord[] records = documentChunkManager.readChunk(serverHost, serverPath, CHUNK_SIZE);
        try {
            if (records.length == 0)
                break;
            // The records consist of up to 1000 individual input streams, which must be all concatenated together into the post
            // To do that, we go into and out of Reader space once again...
            JSONArrayReader arrayReader = new JSONArrayReader();
            for (DocumentRecord dr : records) {
                arrayReader.addArrayElement(new JSONValueReader(new InputStreamReader(dr.getDataStream(), Consts.UTF_8)));
            }
            // post data..
            String responsbody = postData(new ReaderInputStream(arrayReader, Consts.UTF_8));
            // check status
            String status = getStatusFromJsonResponse(responsbody);
            if ("success".equals(status)) {
                // Activity-log the individual documents we sent
                for (DocumentRecord dr : records) {
                    activities.recordActivity(null, dr.getActivity(), dr.getDataSize(), dr.getUri(), "OK", null);
                }
                Logging.ingest.info("AmazonCloudSearch: Successfully sent document chunk " + chunkNumber);
                // remove documents from table..
                documentChunkManager.deleteChunk(records);
            } else {
                // Activity-log the individual documents that failed
                for (DocumentRecord dr : records) {
                    activities.recordActivity(null, dr.getActivity(), dr.getDataSize(), dr.getUri(), "FAILED", responsbody);
                }
                Logging.ingest.error("AmazonCloudSearch: Error sending document chunk " + chunkNumber + ": '" + responsbody + "'");
                throw new ManifoldCFException("Received error status from service after feeding document.  Response body: '" + responsbody + "'");
            }
        } catch (ManifoldCFException e) {
            if (e.getErrorCode() == ManifoldCFException.INTERRUPTED)
                throw e;
            for (DocumentRecord dr : records) {
                activities.recordActivity(null, dr.getActivity(), dr.getDataSize(), dr.getUri(), e.getClass().getSimpleName().toUpperCase(Locale.ROOT), e.getMessage());
            }
            throw e;
        } catch (ServiceInterruption e) {
            for (DocumentRecord dr : records) {
                activities.recordActivity(null, dr.getActivity(), dr.getDataSize(), dr.getUri(), e.getClass().getSimpleName().toUpperCase(Locale.ROOT), e.getMessage());
            }
            throw e;
        } finally {
            Throwable exception = null;
            for (DocumentRecord dr : records) {
                try {
                    dr.close();
                } catch (Throwable e) {
                    exception = e;
                }
            }
            if (exception != null) {
                if (exception instanceof ManifoldCFException)
                    throw (ManifoldCFException) exception;
                else if (exception instanceof Error)
                    throw (Error) exception;
                else if (exception instanceof RuntimeException)
                    throw (RuntimeException) exception;
                else
                    throw new RuntimeException("Unknown exception class thrown: " + exception.getClass().getName() + ": " + exception.getMessage(), exception);
            }
        }
    }
}
Also used : InputStreamReader(java.io.InputStreamReader) ServiceInterruption(org.apache.manifoldcf.agents.interfaces.ServiceInterruption) ReaderInputStream(org.apache.commons.io.input.ReaderInputStream) ManifoldCFException(org.apache.manifoldcf.core.interfaces.ManifoldCFException)

Example 2 with ServiceInterruption

use of org.apache.manifoldcf.agents.interfaces.ServiceInterruption in project manifoldcf by apache.

the class AmazonCloudSearchConnector method check.

/**
 * Test the connection.  Returns a string describing the connection integrity.
 *@return the connection's status as a displayable string.
 */
@Override
public String check() throws ManifoldCFException {
    try {
        getSession();
        String responsbody = postData(new ReaderInputStream(new StringReader("[]"), Consts.UTF_8));
        String status = "";
        try {
            status = getStatusFromJsonResponse(responsbody);
        } catch (ManifoldCFException e) {
            Logging.ingest.debug(e);
            return "Could not get status from response body. Check Access Policy setting of your domain of Amazon CloudSearch.: " + e.getMessage();
        }
        if ("error".equalsIgnoreCase(status)) {
            return "Connection working. responsbody : " + responsbody;
        }
        return "Connection NOT working. responsbody : " + responsbody;
    } catch (ServiceInterruption e) {
        Logging.ingest.debug(e);
        return "Transient exception: " + e.getMessage();
    }
}
Also used : ServiceInterruption(org.apache.manifoldcf.agents.interfaces.ServiceInterruption) ReaderInputStream(org.apache.commons.io.input.ReaderInputStream) ManifoldCFException(org.apache.manifoldcf.core.interfaces.ManifoldCFException) StringReader(java.io.StringReader)

Example 3 with ServiceInterruption

use of org.apache.manifoldcf.agents.interfaces.ServiceInterruption in project manifoldcf by apache.

the class CmisRepositoryConnector method getSession.

/**
 * Set up a session
 */
protected void getSession() throws ManifoldCFException, ServiceInterruption {
    if (session == null) {
        if (StringUtils.isEmpty(binding))
            throw new ManifoldCFException("Parameter " + CmisConfig.BINDING_PARAM + " required but not set");
        if (StringUtils.isEmpty(username))
            throw new ManifoldCFException("Parameter " + CmisConfig.USERNAME_PARAM + " required but not set");
        if (Logging.connectors.isDebugEnabled())
            Logging.connectors.debug("CMIS: Username = '" + username + "'");
        if (StringUtils.isEmpty(password))
            throw new ManifoldCFException("Parameter " + CmisConfig.PASSWORD_PARAM + " required but not set");
        Logging.connectors.debug("CMIS: Password exists");
        if (StringUtils.isEmpty(protocol))
            throw new ManifoldCFException("Parameter " + CmisConfig.PROTOCOL_PARAM + " required but not set");
        if (StringUtils.isEmpty(server))
            throw new ManifoldCFException("Parameter " + CmisConfig.SERVER_PARAM + " required but not set");
        if (StringUtils.isEmpty(port))
            throw new ManifoldCFException("Parameter " + CmisConfig.PORT_PARAM + " required but not set");
        if (StringUtils.isEmpty(path))
            throw new ManifoldCFException("Parameter " + CmisConfig.PATH_PARAM + " required but not set");
        long currentTime;
        GetSessionThread t = new GetSessionThread();
        try {
            t.start();
            t.join();
            Throwable thr = t.getException();
            if (thr != null) {
                if (thr instanceof java.net.MalformedURLException)
                    throw (java.net.MalformedURLException) thr;
                else if (thr instanceof NotBoundException)
                    throw (NotBoundException) thr;
                else if (thr instanceof RemoteException)
                    throw (RemoteException) thr;
                else if (thr instanceof CmisConnectionException)
                    throw new ManifoldCFException("CMIS: Error during getting a new session: " + thr.getMessage(), thr);
                else if (thr instanceof CmisPermissionDeniedException)
                    throw new ManifoldCFException("CMIS: Wrong credentials during getting a new session: " + thr.getMessage(), thr);
                else
                    throw (Error) thr;
            }
        } catch (InterruptedException e) {
            t.interrupt();
            throw new ManifoldCFException("Interrupted: " + e.getMessage(), e, ManifoldCFException.INTERRUPTED);
        } catch (java.net.MalformedURLException e) {
            throw new ManifoldCFException(e.getMessage(), e);
        } catch (NotBoundException e) {
            // Transient problem: Server not available at the moment.
            Logging.connectors.warn("CMIS: Server not up at the moment: " + e.getMessage(), e);
            currentTime = System.currentTimeMillis();
            throw new ServiceInterruption(e.getMessage(), currentTime + 60000L);
        } catch (RemoteException e) {
            Throwable e2 = e.getCause();
            if (e2 instanceof InterruptedException || e2 instanceof InterruptedIOException)
                throw new ManifoldCFException(e2.getMessage(), e2, ManifoldCFException.INTERRUPTED);
            // Treat this as a transient problem
            Logging.connectors.warn("CMIS: Transient remote exception creating session: " + e.getMessage(), e);
            currentTime = System.currentTimeMillis();
            throw new ServiceInterruption(e.getMessage(), currentTime + 60000L);
        }
    }
    lastSessionFetch = System.currentTimeMillis();
}
Also used : InterruptedIOException(java.io.InterruptedIOException) ServiceInterruption(org.apache.manifoldcf.agents.interfaces.ServiceInterruption) NotBoundException(java.rmi.NotBoundException) ManifoldCFException(org.apache.manifoldcf.core.interfaces.ManifoldCFException) CmisConnectionException(org.apache.chemistry.opencmis.commons.exceptions.CmisConnectionException) CmisPermissionDeniedException(org.apache.chemistry.opencmis.commons.exceptions.CmisPermissionDeniedException) RemoteException(java.rmi.RemoteException)

Example 4 with ServiceInterruption

use of org.apache.manifoldcf.agents.interfaces.ServiceInterruption in project manifoldcf by apache.

the class CmisRepositoryConnector method checkConnection.

protected void checkConnection() throws ManifoldCFException, ServiceInterruption {
    while (true) {
        boolean noSession = (session == null);
        getSession();
        long currentTime;
        CheckConnectionThread t = new CheckConnectionThread();
        try {
            t.start();
            t.join();
            Throwable thr = t.getException();
            if (thr != null) {
                if (thr instanceof RemoteException)
                    throw (RemoteException) thr;
                else if (thr instanceof CmisConnectionException)
                    throw new ManifoldCFException("CMIS: Error during checking connection: " + thr.getMessage(), thr);
                else
                    throw (Error) thr;
            }
            return;
        } catch (InterruptedException e) {
            t.interrupt();
            throw new ManifoldCFException("Interrupted: " + e.getMessage(), e, ManifoldCFException.INTERRUPTED);
        } catch (RemoteException e) {
            Throwable e2 = e.getCause();
            if (e2 instanceof InterruptedException || e2 instanceof InterruptedIOException)
                throw new ManifoldCFException(e2.getMessage(), e2, ManifoldCFException.INTERRUPTED);
            if (noSession) {
                currentTime = System.currentTimeMillis();
                throw new ServiceInterruption("Transient error connecting to filenet service: " + e.getMessage(), currentTime + 60000L);
            }
            session = null;
            lastSessionFetch = -1L;
            continue;
        }
    }
}
Also used : ServiceInterruption(org.apache.manifoldcf.agents.interfaces.ServiceInterruption) InterruptedIOException(java.io.InterruptedIOException) CmisConnectionException(org.apache.chemistry.opencmis.commons.exceptions.CmisConnectionException) ManifoldCFException(org.apache.manifoldcf.core.interfaces.ManifoldCFException) RemoteException(java.rmi.RemoteException)

Example 5 with ServiceInterruption

use of org.apache.manifoldcf.agents.interfaces.ServiceInterruption in project manifoldcf by apache.

the class SPSProxyHelper method getFieldValues.

/**
 * Gets a list of field values of the given document
 * @param fieldNames
 * @param site
 * @param docId
 * @return set of the field values
 */
public Map<String, String> getFieldValues(String[] fieldNames, String site, String docLibrary, String docId, boolean dspStsWorks) throws ManifoldCFException, ServiceInterruption {
    if (Logging.connectors.isDebugEnabled())
        Logging.connectors.debug("SharePoint: In getFieldValues; fieldNames=" + fieldNames + ", site='" + site + "', docLibrary='" + docLibrary + "', docId='" + docId + "', dspStsWorks=" + dspStsWorks);
    long currentTime;
    try {
        HashMap<String, String> result = new HashMap<String, String>();
        // root case
        if (site.compareTo("/") == 0)
            site = "";
        if (dspStsWorks) {
            StsAdapterWS listService = new StsAdapterWS(baseUrl + site, userName, password, configuration, httpClient);
            StsAdapterSoapStub stub = (StsAdapterSoapStub) listService.getStsAdapterSoapHandler();
            String[] vArray = new String[1];
            vArray[0] = "1.0";
            VersionsHeader myVersion = new VersionsHeader();
            myVersion.setVersion(vArray);
            stub.setHeader("http://schemas.microsoft.com/sharepoint/dsp", "versions", myVersion);
            RequestHeader reqHeader = new RequestHeader();
            reqHeader.setDocument(DocumentType.content);
            reqHeader.setMethod(MethodType.query);
            stub.setHeader("http://schemas.microsoft.com/sharepoint/dsp", "request", reqHeader);
            QueryRequest myRequest = new QueryRequest();
            DSQuery sQuery = new DSQuery();
            sQuery.setSelect("/list[@id='" + docLibrary + "']");
            sQuery.setResultContent(ResultContentType.dataOnly);
            myRequest.setDsQuery(sQuery);
            DspQuery spQuery = new DspQuery();
            spQuery.setRowLimit(1);
            // For the Requested Fields
            if (fieldNames.length > 0) {
                Fields spFields = new Fields();
                Field[] fieldArray = new Field[0];
                ArrayList fields = new ArrayList();
                Field spField = new Field();
                for (String fieldName : fieldNames) {
                    spField = new Field();
                    spField.setName(fieldName);
                    spField.setAlias(fieldName);
                    fields.add(spField);
                }
                spFields.setField((Field[]) fields.toArray(fieldArray));
                spQuery.setFields(spFields);
            }
            // Of this document
            DspQueryWhere spWhere = new DspQueryWhere();
            org.apache.axis.message.MessageElement criterion = new org.apache.axis.message.MessageElement((String) null, "Contains");
            SOAPElement seFieldRef = criterion.addChildElement("FieldRef");
            seFieldRef.addAttribute(SOAPFactory.newInstance().createName("Name"), "FileRef");
            SOAPElement seValue = criterion.addChildElement("Value");
            seValue.addAttribute(SOAPFactory.newInstance().createName("Type"), "String");
            seValue.setValue(docId);
            org.apache.axis.message.MessageElement[] criteria = { criterion };
            spWhere.set_any(criteria);
            spQuery.setWhere((DspQueryWhere) spWhere);
            // Set Criteria
            myRequest.getDsQuery().setQuery(spQuery);
            StsAdapterSoap call = stub;
            // Make Request
            QueryResponse resp = call.query(myRequest);
            org.apache.axis.message.MessageElement[] list = resp.get_any();
            final String xmlResponse = list[0].toString();
            if (Logging.connectors.isDebugEnabled()) {
                Logging.connectors.debug("SharePoint: getFieldValues xml response: '" + xmlResponse + "'");
            }
            final XMLDoc doc;
            try {
                doc = new XMLDoc(xmlResponse);
            } catch (ManifoldCFException e) {
                return null;
            }
            ArrayList nodeList = new ArrayList();
            doc.processPath(nodeList, "*", null);
            if (nodeList.size() != 1) {
                throw new ManifoldCFException("Bad xml - missing outer 'ns1:dsQueryResponse' node - there are " + Integer.toString(nodeList.size()) + " nodes");
            }
            Object parent = nodeList.get(0);
            // System.out.println( "Outer NodeName = " + doc.getNodeName(parent) );
            if (!doc.getNodeName(parent).equals("ns1:dsQueryResponse"))
                throw new ManifoldCFException("Bad xml - outer node is not 'ns1:dsQueryResponse'");
            nodeList.clear();
            doc.processPath(nodeList, "*", parent);
            // <Shared_X0020_Documents />
            parent = nodeList.get(0);
            nodeList.clear();
            doc.processPath(nodeList, "*", parent);
            // Get each childs Value and add to return array
            for (int i = 0; i < nodeList.size(); i++) {
                Object documentNode = nodeList.get(i);
                ArrayList fieldList = new ArrayList();
                doc.processPath(fieldList, "*", documentNode);
                for (int j = 0; j < fieldList.size(); j++) {
                    Object field = fieldList.get(j);
                    String fieldData = doc.getData(field);
                    String fieldName = doc.getNodeName(field);
                    // Right now this really only works right for single-valued fields.  For multi-valued
                    // fields, we'd need to know in advance that they were multivalued
                    // so that we could interpret commas as value separators.
                    result.put(fieldName, fieldData);
                }
            }
        } else {
            // SharePoint 2010: Get field values some other way
            // Sharepoint 2010; use Lists service instead
            ListsWS lservice = new ListsWS(baseUrl + site, userName, password, configuration, httpClient);
            ListsSoapStub stub1 = (ListsSoapStub) lservice.getListsSoapHandler();
            String sitePlusDocId = serverLocation + site + docId;
            if (sitePlusDocId.startsWith("/"))
                sitePlusDocId = sitePlusDocId.substring(1);
            GetListItemsQuery q = buildMatchQuery("FileRef", "Text", sitePlusDocId);
            GetListItemsViewFields viewFields = buildViewFields(fieldNames);
            GetListItemsResponseGetListItemsResult items = stub1.getListItems(docLibrary, "", q, viewFields, "1", buildNonPagingQueryOptions(), null);
            if (items == null)
                return result;
            MessageElement[] list = items.get_any();
            final String xmlResponse = list[0].toString();
            if (Logging.connectors.isDebugEnabled()) {
                Logging.connectors.debug("SharePoint: getListItems FileRef value '" + sitePlusDocId + "', xml response: '" + xmlResponse + "'");
            }
            ArrayList nodeList = new ArrayList();
            final XMLDoc doc;
            try {
                doc = new XMLDoc(xmlResponse);
            } catch (ManifoldCFException e) {
                return null;
            }
            doc.processPath(nodeList, "*", null);
            if (nodeList.size() != 1)
                throw new ManifoldCFException("Bad xml - expecting one outer 'ns1:listitems' node - there are " + Integer.toString(nodeList.size()) + " nodes");
            Object parent = nodeList.get(0);
            if (!"ns1:listitems".equals(doc.getNodeName(parent)))
                throw new ManifoldCFException("Bad xml - outer node is not 'ns1:listitems'");
            nodeList.clear();
            doc.processPath(nodeList, "*", parent);
            if (nodeList.size() != 1)
                throw new ManifoldCFException("Expected rsdata result but no results found.");
            Object rsData = nodeList.get(0);
            int itemCount = Integer.parseInt(doc.getValue(rsData, "ItemCount"));
            if (itemCount == 0)
                return result;
            // Now, extract the files from the response document
            ArrayList nodeDocs = new ArrayList();
            doc.processPath(nodeDocs, "*", rsData);
            if (nodeDocs.size() != itemCount)
                throw new ManifoldCFException("itemCount does not match with nodeDocs.size()");
            if (itemCount != 1)
                throw new ManifoldCFException("Expecting only one item, instead saw '" + itemCount + "'");
            Object o = nodeDocs.get(0);
            // Look for all the specified attributes in the record
            for (Object attrName : fieldNames) {
                String attrValue = doc.getValue(o, "ows_" + (String) attrName);
                if (attrValue != null) {
                    result.put(attrName.toString(), valueMunge(attrValue));
                }
            }
        }
        return result;
    } catch (javax.xml.soap.SOAPException e) {
        throw new ManifoldCFException("Soap exception: " + e.getMessage(), e);
    } catch (java.net.MalformedURLException e) {
        throw new ManifoldCFException("Bad SharePoint url: " + e.getMessage(), e);
    } catch (javax.xml.rpc.ServiceException e) {
        if (Logging.connectors.isDebugEnabled())
            Logging.connectors.debug("SharePoint: Got a service exception getting field values for site " + site + " library " + docLibrary + " document '" + docId + "' - retrying", e);
        currentTime = System.currentTimeMillis();
        throw new ServiceInterruption("Service exception: " + e.getMessage(), e, currentTime + 300000L, currentTime + 12 * 60 * 60000L, -1, true);
    } catch (org.apache.axis.AxisFault e) {
        // Bad XML can come from Microsoft.
        if (e.getCause() != null && (e.getCause() instanceof org.xml.sax.SAXParseException)) {
            return null;
        }
        if (e.getFaultCode().equals(new javax.xml.namespace.QName("http://xml.apache.org/axis/", "HTTP"))) {
            org.w3c.dom.Element elem = e.lookupFaultDetail(new javax.xml.namespace.QName("http://xml.apache.org/axis/", "HttpErrorCode"));
            if (elem != null) {
                elem.normalize();
                String httpErrorCode = elem.getFirstChild().getNodeValue().trim();
                // 302 is what SharePoint returns for external sites
                if (httpErrorCode.equals("404") || httpErrorCode.equals("302"))
                    return null;
                else if (httpErrorCode.equals("403"))
                    throw new ManifoldCFException("Remote procedure exception: " + e.getMessage(), e);
                else if (httpErrorCode.equals("401")) {
                    if (Logging.connectors.isDebugEnabled())
                        Logging.connectors.debug("SharePoint: Crawl user does not have sufficient privileges to get field values for site " + site + " library " + docLibrary + " - skipping", e);
                    return null;
                }
                throw new ManifoldCFException("Unexpected http error code " + httpErrorCode + " accessing SharePoint at " + baseUrl + site + ": " + e.getMessage(), e);
            }
            throw new ManifoldCFException("Unknown http error occurred: " + e.getMessage(), e);
        }
        if (e.getFaultCode().equals(new javax.xml.namespace.QName("http://schemas.xmlsoap.org/soap/envelope/", "Server.userException"))) {
            String exceptionName = e.getFaultString();
            if (exceptionName.equals("java.lang.InterruptedException"))
                throw new ManifoldCFException("Interrupted", ManifoldCFException.INTERRUPTED);
        }
        // I don't know if this is what you get when the library is missing, but here's hoping.
        if (e.getMessage().indexOf("List does not exist") != -1)
            return null;
        if (Logging.connectors.isDebugEnabled())
            Logging.connectors.debug("SharePoint: Got a remote exception getting field values for site " + site + " library " + docLibrary + " document [" + docId + "] - retrying", e);
        currentTime = System.currentTimeMillis();
        throw new ServiceInterruption("Remote procedure exception: " + e.getMessage(), e, currentTime + 300000L, currentTime + 3 * 60 * 60000L, -1, false);
    } catch (java.rmi.RemoteException e) {
        throw new ManifoldCFException("Unexpected remote exception occurred: " + e.getMessage(), e);
    }
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) MessageElement(org.apache.axis.message.MessageElement) MessageElement(org.apache.axis.message.MessageElement) javax.xml.soap(javax.xml.soap) com.microsoft.schemas.sharepoint.soap(com.microsoft.schemas.sharepoint.soap) MessageElement(org.apache.axis.message.MessageElement) XMLDoc(org.apache.manifoldcf.core.common.XMLDoc) ServiceInterruption(org.apache.manifoldcf.agents.interfaces.ServiceInterruption) QName(javax.xml.namespace.QName) ManifoldCFException(org.apache.manifoldcf.core.interfaces.ManifoldCFException)

Aggregations

ServiceInterruption (org.apache.manifoldcf.agents.interfaces.ServiceInterruption)33 ManifoldCFException (org.apache.manifoldcf.core.interfaces.ManifoldCFException)31 ArrayList (java.util.ArrayList)15 InterruptedIOException (java.io.InterruptedIOException)13 QName (javax.xml.namespace.QName)13 MessageElement (org.apache.axis.message.MessageElement)13 XMLDoc (org.apache.manifoldcf.core.common.XMLDoc)12 IOException (java.io.IOException)6 RemoteException (java.rmi.RemoteException)6 CmisConnectionException (org.apache.chemistry.opencmis.commons.exceptions.CmisConnectionException)4 ConnectException (java.net.ConnectException)3 MalformedURLException (java.net.MalformedURLException)3 HashMap (java.util.HashMap)3 MongoException (com.mongodb.MongoException)2 FileInputStream (java.io.FileInputStream)2 FileOutputStream (java.io.FileOutputStream)2 InputStream (java.io.InputStream)2 NotBoundException (java.rmi.NotBoundException)2 HashSet (java.util.HashSet)2 SmbException (jcifs.smb.SmbException)2