Search in sources :

Example 6 with TenantAclIdDbId

use of org.alfresco.solr.AlfrescoSolrDataModel.TenantAclIdDbId in project SearchServices by Alfresco.

the class ContentTrackerTest method doTrackWithContentUpdatesContent.

@Test
@Ignore("Superseded by AlfrescoSolrTrackerTest")
public void doTrackWithContentUpdatesContent() throws Exception {
    List<TenantAclIdDbId> docs1 = new ArrayList<>();
    List<TenantAclIdDbId> docs2 = new ArrayList<>();
    List<TenantAclIdDbId> emptyList = new ArrayList<>();
    // Adds one more than the UPDATE_BATCH
    for (int i = 0; i <= UPDATE_BATCH; i++) {
        TenantAclIdDbId doc = new TenantAclIdDbId();
        doc.dbId = 1l;
        doc.tenant = "1";
        docs1.add(doc);
    }
    TenantAclIdDbId thirdDoc = docs1.get(UPDATE_BATCH);
    thirdDoc.dbId = 3l;
    thirdDoc.tenant = "3";
    // Adds UPDATE_BATCH
    for (long i = 0; i < UPDATE_BATCH; i++) {
        TenantAclIdDbId doc = new TenantAclIdDbId();
        doc.dbId = 2l;
        doc.tenant = "2";
        docs2.add(doc);
    }
    when(this.srv.getDocsWithUncleanContent(anyInt(), anyInt())).thenReturn(docs1).thenReturn(docs2).thenReturn(emptyList);
    this.contentTracker.doTrack();
    InOrder order = inOrder(srv);
    order.verify(srv).getDocsWithUncleanContent(0, READ_BATCH);
    /*
         * I had to make each bunch of calls have different parameters to prevent Mockito from incorrectly failing
         * because it was finding 5 calls instead of finding the first two calls, then the commit, then the rest...
         * It seems that Mockito has a bug with verification in order.
         * See https://code.google.com/p/mockito/issues/detail?id=296
         */
    // From docs1
    order.verify(srv, times(UPDATE_BATCH)).updateContentToIndexAndCache(1l, "1");
    order.verify(srv).commit();
    // The one extra doc should be processed and then committed
    order.verify(srv).updateContentToIndexAndCache(thirdDoc.dbId, thirdDoc.tenant);
    order.verify(srv).commit();
    order.verify(srv).getDocsWithUncleanContent(0 + READ_BATCH, READ_BATCH);
    // From docs2
    order.verify(srv, times(UPDATE_BATCH)).updateContentToIndexAndCache(2l, "2");
    order.verify(srv).commit();
    order.verify(srv).getDocsWithUncleanContent(0 + READ_BATCH + READ_BATCH, READ_BATCH);
}
Also used : InOrder(org.mockito.InOrder) TenantAclIdDbId(org.alfresco.solr.AlfrescoSolrDataModel.TenantAclIdDbId) ArrayList(java.util.ArrayList) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 7 with TenantAclIdDbId

use of org.alfresco.solr.AlfrescoSolrDataModel.TenantAclIdDbId in project SearchServices by Alfresco.

the class SolrInformationServer method getCascadeNodes.

public List<NodeMetaData> getCascadeNodes(List<Long> txnIds) throws AuthenticationException, IOException, JSONException {
    List<FieldInstance> list = AlfrescoSolrDataModel.getInstance().getIndexedFieldNamesForProperty(ContentModel.PROP_CASCADE_TX).getFields();
    FieldInstance fieldInstance = list.get(0);
    RefCounted<SolrIndexSearcher> refCounted = null;
    IntArrayList docList = null;
    HashSet<Long> childIds = new HashSet();
    try {
        refCounted = core.getSearcher();
        SolrIndexSearcher searcher = refCounted.get();
        String field = fieldInstance.getField();
        SchemaField schemaField = searcher.getSchema().getField(field);
        FieldType fieldType = schemaField.getType();
        BooleanQuery.Builder builder = new BooleanQuery.Builder();
        BooleanQuery booleanQuery = null;
        for (Long l : txnIds) {
            BytesRefBuilder bytesRefBuilder = new BytesRefBuilder();
            fieldType.readableToIndexed(l.toString(), bytesRefBuilder);
            TermQuery termQuery = new TermQuery(new Term(field, bytesRefBuilder.toBytesRef()));
            BooleanClause booleanClause = new BooleanClause(termQuery, BooleanClause.Occur.SHOULD);
            builder.add(booleanClause);
        }
        booleanQuery = builder.build();
        DocListCollector collector = new DocListCollector();
        searcher.search(booleanQuery, collector);
        docList = collector.getDocs();
        // System.out.println("################ CASCASDE Parent Nodes:"+docList.size());
        int size = docList.size();
        Set set = new HashSet();
        set.add(FIELD_SOLR4_ID);
        for (int i = 0; i < size; i++) {
            int docId = docList.get(i);
            Document document = searcher.doc(docId, set);
            IndexableField indexableField = document.getField(FIELD_SOLR4_ID);
            String id = indexableField.stringValue();
            TenantAclIdDbId ids = AlfrescoSolrDataModel.decodeNodeDocumentId(id);
            // System.out.println("################## Cascade Parent:"+ ids.dbId);
            childIds.add(ids.dbId);
        }
    } finally {
        refCounted.decref();
    }
    List<NodeMetaData> allNodeMetaDatas = new ArrayList();
    for (Long childId : childIds) {
        NodeMetaDataParameters nmdp = new NodeMetaDataParameters();
        nmdp.setFromNodeId(childId);
        nmdp.setToNodeId(childId);
        nmdp.setIncludeAclId(false);
        nmdp.setIncludeAspects(false);
        nmdp.setIncludeChildAssociations(false);
        nmdp.setIncludeChildIds(true);
        nmdp.setIncludeNodeRef(false);
        nmdp.setIncludeOwner(false);
        nmdp.setIncludeParentAssociations(false);
        // We only care about the path and ancestors (which is included) for this case
        nmdp.setIncludePaths(true);
        nmdp.setIncludeProperties(false);
        nmdp.setIncludeType(false);
        nmdp.setIncludeTxnId(true);
        // Gets only one
        List<NodeMetaData> nodeMetaDatas = repositoryClient.getNodesMetaData(nmdp, 1);
        allNodeMetaDatas.addAll(nodeMetaDatas);
    }
    return allNodeMetaDatas;
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) Set(java.util.Set) AclChangeSet(org.alfresco.solr.client.AclChangeSet) LinkedHashSet(java.util.LinkedHashSet) IOpenBitSet(org.alfresco.solr.adapters.IOpenBitSet) HashSet(java.util.HashSet) TenantAclIdDbId(org.alfresco.solr.AlfrescoSolrDataModel.TenantAclIdDbId) BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) IntArrayList(com.carrotsearch.hppc.IntArrayList) ArrayList(java.util.ArrayList) Document(org.apache.lucene.document.Document) SolrInputDocument(org.apache.solr.common.SolrInputDocument) SolrDocument(org.apache.solr.common.SolrDocument) NodeMetaDataParameters(org.alfresco.solr.client.NodeMetaDataParameters) FieldInstance(org.alfresco.solr.AlfrescoSolrDataModel.FieldInstance) LinkedHashSet(java.util.LinkedHashSet) HashSet(java.util.HashSet) TermQuery(org.apache.lucene.search.TermQuery) BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) NodeMetaData(org.alfresco.solr.client.NodeMetaData) SolrIndexSearcher(org.apache.solr.search.SolrIndexSearcher) Term(org.apache.lucene.index.Term) FieldType(org.apache.solr.schema.FieldType) SchemaField(org.apache.solr.schema.SchemaField) BooleanClause(org.apache.lucene.search.BooleanClause) IndexableField(org.apache.lucene.index.IndexableField) IntArrayList(com.carrotsearch.hppc.IntArrayList)

Example 8 with TenantAclIdDbId

use of org.alfresco.solr.AlfrescoSolrDataModel.TenantAclIdDbId in project SearchServices by Alfresco.

the class SolrInformationServer method getDocsWithUncleanContent.

@Override
public List<TenantAclIdDbId> getDocsWithUncleanContent(int start, int rows) throws IOException {
    RefCounted<SolrIndexSearcher> refCounted = null;
    try {
        List<TenantAclIdDbId> docIds = new ArrayList<>();
        refCounted = this.core.getSearcher();
        SolrIndexSearcher searcher = refCounted.get();
        /*
            *  Below is the code for purging the cleanContentCache.
            *  The cleanContentCache is an in-memory LRU cache of the transactions that have already
            *  had their content fetched. This is needed because the ContentTracker does not have an up-to-date
            *  snapshot of the index to determine which nodes are marked as dirty/new. The cleanContentCache is used
            *  to filter out nodes that belong to transactions that have already been processed, which stops them from
            *  being re-processed.
            *
            *  The cleanContentCache needs to be purged periodically to support retrying of failed content fetches.
            *  This is because fetches for individual nodes within the transaction may have failed, but the transaction will still be in the
            *  cleanContentCache, which prevents it from being retried.
            *
            *  Once a transaction is purged from the cleanContentCache it will be retried automatically if it is marked dirty/new
            *  in current snapshot of the index.
            *
            *  The code below runs every two minutes and purges transactions from the
            *  cleanContentCache that is more then 20 minutes old.
            *
            */
        long purgeTime = System.currentTimeMillis();
        if (purgeTime - cleanContentLastPurged > 120000) {
            Iterator<Entry> entries = cleanContentCache.entrySet().iterator();
            while (entries.hasNext()) {
                Entry<Long, Long> entry = entries.next();
                long txnTime = entry.getValue();
                if (purgeTime - txnTime > 1200000) {
                    // Purge the clean content cache of records more then 20 minutes old.
                    entries.remove();
                }
            }
            cleanContentLastPurged = purgeTime;
        }
        long txnFloor = -1;
        // This query gets lowest txnID that has dirty content.
        // System.out.println("############### finding the transaction floor ################");
        TermQuery termQuery1 = new TermQuery(new Term(FIELD_FTSSTATUS, FTSStatus.Dirty.toString()));
        TermQuery termQuery2 = new TermQuery(new Term(FIELD_FTSSTATUS, FTSStatus.New.toString()));
        BooleanClause clause1 = new BooleanClause(termQuery1, BooleanClause.Occur.SHOULD);
        BooleanClause clause2 = new BooleanClause(termQuery2, BooleanClause.Occur.SHOULD);
        BooleanQuery.Builder builder = new BooleanQuery.Builder();
        builder.add(clause1);
        builder.add(clause2);
        BooleanQuery orQuery = builder.build();
        Sort sort = new Sort(new SortField(FIELD_INTXID, SortField.Type.LONG));
        sort = sort.rewrite(searcher);
        TopFieldCollector collector = TopFieldCollector.create(sort, 1, null, false, false, false);
        // Filter transactions that have already been processed.
        DelegatingCollector delegatingCollector = new TxnCacheFilter(cleanContentCache);
        delegatingCollector.setLastDelegate(collector);
        searcher.search(orQuery, delegatingCollector);
        if (collector.getTotalHits() == 0) {
            return docIds;
        }
        ScoreDoc[] scoreDocs = collector.topDocs().scoreDocs;
        List<LeafReaderContext> leaves = searcher.getTopReaderContext().leaves();
        int index = ReaderUtil.subIndex(scoreDocs[0].doc, leaves);
        LeafReaderContext context = leaves.get(index);
        NumericDocValues longs = context.reader().getNumericDocValues(FIELD_INTXID);
        txnFloor = longs.get(scoreDocs[0].doc - context.docBase);
        // System.out.println("################ Transaction floor:"+txnFloor);
        // Find the next N transactions
        collector = TopFieldCollector.create(new Sort(new SortField(FIELD_INTXID, SortField.Type.LONG)), rows, null, false, false, false);
        delegatingCollector = new TxnFloorFilter(txnFloor, cleanContentCache);
        delegatingCollector.setLastDelegate(collector);
        TermQuery txnQuery = new TermQuery(new Term(FIELD_DOC_TYPE, DOC_TYPE_TX));
        searcher.search(txnQuery, delegatingCollector);
        TopDocs docs = collector.topDocs();
        if (collector.getTotalHits() == 0) {
            // No new transactions to consider
            return docIds;
        }
        leaves = searcher.getTopReaderContext().leaves();
        FieldType fieldType = searcher.getSchema().getField(FIELD_INTXID).getType();
        builder = new BooleanQuery.Builder();
        for (ScoreDoc scoreDoc : docs.scoreDocs) {
            index = ReaderUtil.subIndex(scoreDoc.doc, leaves);
            context = leaves.get(index);
            longs = context.reader().getNumericDocValues(FIELD_INTXID);
            long txnID = longs.get(scoreDoc.doc - context.docBase);
            // Build up the query for the filter of transactions we need to pull the dirty content for.
            TermQuery txnIDQuery = new TermQuery(new Term(FIELD_INTXID, fieldType.readableToIndexed(Long.toString(txnID))));
            builder.add(new BooleanClause(txnIDQuery, BooleanClause.Occur.SHOULD));
        }
        BooleanQuery txnFilterQuery = builder.build();
        // Get the docs with dirty content for the transactions gathered above.
        TermQuery statusQuery1 = new TermQuery(new Term(FIELD_FTSSTATUS, FTSStatus.Dirty.toString()));
        TermQuery statusQuery2 = new TermQuery(new Term(FIELD_FTSSTATUS, FTSStatus.New.toString()));
        BooleanClause statusClause1 = new BooleanClause(statusQuery1, BooleanClause.Occur.SHOULD);
        BooleanClause statusClause2 = new BooleanClause(statusQuery2, BooleanClause.Occur.SHOULD);
        BooleanQuery.Builder builder1 = new BooleanQuery.Builder();
        builder1.add(statusClause1);
        builder1.add(statusClause2);
        BooleanQuery statusQuery = builder1.build();
        DocListCollector docListCollector = new DocListCollector();
        BooleanQuery.Builder builder2 = new BooleanQuery.Builder();
        builder2.add(statusQuery, BooleanClause.Occur.MUST);
        builder2.add(new QueryWrapperFilter(txnFilterQuery), BooleanClause.Occur.MUST);
        searcher.search(builder2.build(), docListCollector);
        IntArrayList docList = docListCollector.getDocs();
        int size = docList.size();
        // System.out.println("############### Dirty Doc Count ################:" + size);
        Set<String> fields = new HashSet<String>();
        fields.add(FIELD_SOLR4_ID);
        List<Long> processedTxns = new ArrayList<Long>();
        for (int i = 0; i < size; ++i) {
            int doc = docList.get(i);
            Document document = searcher.doc(doc, fields);
            index = ReaderUtil.subIndex(doc, leaves);
            context = leaves.get(index);
            longs = context.reader().getNumericDocValues(FIELD_INTXID);
            long txnId = longs.get(doc - context.docBase);
            if (!cleanContentCache.containsKey(txnId)) {
                processedTxns.add(txnId);
                IndexableField id = document.getField(FIELD_SOLR4_ID);
                String idString = id.stringValue();
                TenantAclIdDbId tenantAndDbId = AlfrescoSolrDataModel.decodeNodeDocumentId(idString);
                docIds.add(tenantAndDbId);
            }
        }
        long txnTime = System.currentTimeMillis();
        for (Long l : processedTxns) {
            // Save the indexVersion so we know when we can clean out this entry
            cleanContentCache.put(l, txnTime);
        }
        return docIds;
    } finally {
        refCounted.decref();
    }
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) NumericDocValues(org.apache.lucene.index.NumericDocValues) TenantAclIdDbId(org.alfresco.solr.AlfrescoSolrDataModel.TenantAclIdDbId) BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) IntArrayList(com.carrotsearch.hppc.IntArrayList) ArrayList(java.util.ArrayList) SortField(org.apache.lucene.search.SortField) Document(org.apache.lucene.document.Document) SolrInputDocument(org.apache.solr.common.SolrInputDocument) SolrDocument(org.apache.solr.common.SolrDocument) ScoreDoc(org.apache.lucene.search.ScoreDoc) TopDocs(org.apache.lucene.search.TopDocs) DelegatingCollector(org.apache.solr.search.DelegatingCollector) Entry(java.util.Map.Entry) Sort(org.apache.lucene.search.Sort) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) TopFieldCollector(org.apache.lucene.search.TopFieldCollector) LinkedHashSet(java.util.LinkedHashSet) HashSet(java.util.HashSet) TermQuery(org.apache.lucene.search.TermQuery) SolrIndexSearcher(org.apache.solr.search.SolrIndexSearcher) Term(org.apache.lucene.index.Term) QueryWrapperFilter(org.apache.solr.search.QueryWrapperFilter) FieldType(org.apache.solr.schema.FieldType) BooleanClause(org.apache.lucene.search.BooleanClause) IndexableField(org.apache.lucene.index.IndexableField) IntArrayList(com.carrotsearch.hppc.IntArrayList)

Example 9 with TenantAclIdDbId

use of org.alfresco.solr.AlfrescoSolrDataModel.TenantAclIdDbId in project SearchServices by Alfresco.

the class AlfrescoSolrClusteringComponent method getSolrInputDocument.

private SolrInputDocument getSolrInputDocument(Document doc, SolrQueryRequest req) throws IOException {
    try {
        String id = getFieldValueString(doc, FIELD_SOLR4_ID);
        TenantAclIdDbId tenantAndDbId = AlfrescoSolrDataModel.decodeNodeDocumentId(id);
        CoreContainer coreContainer = req.getSearcher().getCore().getCoreContainer();
        AlfrescoCoreAdminHandler coreAdminHandler = (AlfrescoCoreAdminHandler) coreContainer.getMultiCoreHandler();
        SolrInformationServer srv = (SolrInformationServer) coreAdminHandler.getInformationServers().get(req.getSearcher().getCore().getName());
        SolrContentStore solrContentStore = srv.getSolrContentStore();
        SolrInputDocument sid = solrContentStore.retrieveDocFromSolrContentStore(tenantAndDbId.tenant, tenantAndDbId.dbId);
        return sid;
    } catch (StringIndexOutOfBoundsException e) {
        throw new IOException(e);
    }
}
Also used : SolrInputDocument(org.apache.solr.common.SolrInputDocument) TenantAclIdDbId(org.alfresco.solr.AlfrescoSolrDataModel.TenantAclIdDbId) CoreContainer(org.apache.solr.core.CoreContainer) AlfrescoCoreAdminHandler(org.alfresco.solr.AlfrescoCoreAdminHandler) SolrContentStore(org.alfresco.solr.content.SolrContentStore) IOException(java.io.IOException) SolrInformationServer(org.alfresco.solr.SolrInformationServer)

Example 10 with TenantAclIdDbId

use of org.alfresco.solr.AlfrescoSolrDataModel.TenantAclIdDbId in project SearchServices by Alfresco.

the class ContentTracker method doTrack.

@Override
protected void doTrack() throws Exception {
    // System.out.println("############## Content Tracker doTrack()");
    try {
        long startElapsed = System.nanoTime();
        checkShutdown();
        final int ROWS = contentReadBatchSize;
        int start = 0;
        long totalDocs = 0l;
        checkShutdown();
        while (true) {
            try {
                getWriteLock().acquire();
                List<TenantAclIdDbId> docs = this.infoSrv.getDocsWithUncleanContent(start, ROWS);
                // System.out.println("####################### Unclean content: "+docs.size()+" ##############################:"+totalDocs);
                if (docs.size() == 0) {
                    break;
                }
                int docsUpdatedSinceLastCommit = 0;
                for (TenantAclIdDbId doc : docs) {
                    ContentIndexWorkerRunnable ciwr = new ContentIndexWorkerRunnable(super.threadHandler, doc, infoSrv);
                    super.threadHandler.scheduleTask(ciwr);
                    docsUpdatedSinceLastCommit++;
                    if (docsUpdatedSinceLastCommit >= contentUpdateBatchSize) {
                        super.waitForAsynchronous();
                        checkShutdown();
                        // this.infoSrv.commit();
                        long endElapsed = System.nanoTime();
                        trackerStats.addElapsedContentTime(docsUpdatedSinceLastCommit, endElapsed - startElapsed);
                        startElapsed = endElapsed;
                        docsUpdatedSinceLastCommit = 0;
                    }
                }
                if (docsUpdatedSinceLastCommit > 0) {
                    super.waitForAsynchronous();
                    checkShutdown();
                    // this.infoSrv.commit();
                    long endElapsed = System.nanoTime();
                    trackerStats.addElapsedContentTime(docsUpdatedSinceLastCommit, endElapsed - startElapsed);
                }
                totalDocs += docs.size();
                checkShutdown();
            } finally {
                getWriteLock().release();
            }
        }
        log.info("total number of docs with content updated: " + totalDocs);
    } catch (Exception e) {
        throw new IOException(e);
    }
}
Also used : TenantAclIdDbId(org.alfresco.solr.AlfrescoSolrDataModel.TenantAclIdDbId) IOException(java.io.IOException) IOException(java.io.IOException)

Aggregations

TenantAclIdDbId (org.alfresco.solr.AlfrescoSolrDataModel.TenantAclIdDbId)10 SolrInputDocument (org.apache.solr.common.SolrInputDocument)7 IOException (java.io.IOException)5 ArrayList (java.util.ArrayList)4 HashSet (java.util.HashSet)4 LinkedHashSet (java.util.LinkedHashSet)4 SolrDocument (org.apache.solr.common.SolrDocument)4 IntArrayList (com.carrotsearch.hppc.IntArrayList)3 AlfrescoCoreAdminHandler (org.alfresco.solr.AlfrescoCoreAdminHandler)3 SolrInformationServer (org.alfresco.solr.SolrInformationServer)3 NodeMetaData (org.alfresco.solr.client.NodeMetaData)3 NodeMetaDataParameters (org.alfresco.solr.client.NodeMetaDataParameters)3 SolrContentStore (org.alfresco.solr.content.SolrContentStore)3 Document (org.apache.lucene.document.Document)3 IndexableField (org.apache.lucene.index.IndexableField)3 Term (org.apache.lucene.index.Term)3 BooleanClause (org.apache.lucene.search.BooleanClause)3 BooleanQuery (org.apache.lucene.search.BooleanQuery)3 TermQuery (org.apache.lucene.search.TermQuery)3 BytesRefBuilder (org.apache.lucene.util.BytesRefBuilder)3