use of org.alfresco.solr.AlfrescoSolrDataModel.TenantAclIdDbId in project SearchServices by Alfresco.
the class ContentTrackerTest method doTrackWithContentUpdatesContent.
@Test
@Ignore("Superseded by AlfrescoSolrTrackerTest")
public void doTrackWithContentUpdatesContent() throws Exception {
List<TenantAclIdDbId> docs1 = new ArrayList<>();
List<TenantAclIdDbId> docs2 = new ArrayList<>();
List<TenantAclIdDbId> emptyList = new ArrayList<>();
// Adds one more than the UPDATE_BATCH
for (int i = 0; i <= UPDATE_BATCH; i++) {
TenantAclIdDbId doc = new TenantAclIdDbId();
doc.dbId = 1l;
doc.tenant = "1";
docs1.add(doc);
}
TenantAclIdDbId thirdDoc = docs1.get(UPDATE_BATCH);
thirdDoc.dbId = 3l;
thirdDoc.tenant = "3";
// Adds UPDATE_BATCH
for (long i = 0; i < UPDATE_BATCH; i++) {
TenantAclIdDbId doc = new TenantAclIdDbId();
doc.dbId = 2l;
doc.tenant = "2";
docs2.add(doc);
}
when(this.srv.getDocsWithUncleanContent(anyInt(), anyInt())).thenReturn(docs1).thenReturn(docs2).thenReturn(emptyList);
this.contentTracker.doTrack();
InOrder order = inOrder(srv);
order.verify(srv).getDocsWithUncleanContent(0, READ_BATCH);
/*
* I had to make each bunch of calls have different parameters to prevent Mockito from incorrectly failing
* because it was finding 5 calls instead of finding the first two calls, then the commit, then the rest...
* It seems that Mockito has a bug with verification in order.
* See https://code.google.com/p/mockito/issues/detail?id=296
*/
// From docs1
order.verify(srv, times(UPDATE_BATCH)).updateContentToIndexAndCache(1l, "1");
order.verify(srv).commit();
// The one extra doc should be processed and then committed
order.verify(srv).updateContentToIndexAndCache(thirdDoc.dbId, thirdDoc.tenant);
order.verify(srv).commit();
order.verify(srv).getDocsWithUncleanContent(0 + READ_BATCH, READ_BATCH);
// From docs2
order.verify(srv, times(UPDATE_BATCH)).updateContentToIndexAndCache(2l, "2");
order.verify(srv).commit();
order.verify(srv).getDocsWithUncleanContent(0 + READ_BATCH + READ_BATCH, READ_BATCH);
}
use of org.alfresco.solr.AlfrescoSolrDataModel.TenantAclIdDbId in project SearchServices by Alfresco.
the class SolrInformationServer method getCascadeNodes.
public List<NodeMetaData> getCascadeNodes(List<Long> txnIds) throws AuthenticationException, IOException, JSONException {
List<FieldInstance> list = AlfrescoSolrDataModel.getInstance().getIndexedFieldNamesForProperty(ContentModel.PROP_CASCADE_TX).getFields();
FieldInstance fieldInstance = list.get(0);
RefCounted<SolrIndexSearcher> refCounted = null;
IntArrayList docList = null;
HashSet<Long> childIds = new HashSet();
try {
refCounted = core.getSearcher();
SolrIndexSearcher searcher = refCounted.get();
String field = fieldInstance.getField();
SchemaField schemaField = searcher.getSchema().getField(field);
FieldType fieldType = schemaField.getType();
BooleanQuery.Builder builder = new BooleanQuery.Builder();
BooleanQuery booleanQuery = null;
for (Long l : txnIds) {
BytesRefBuilder bytesRefBuilder = new BytesRefBuilder();
fieldType.readableToIndexed(l.toString(), bytesRefBuilder);
TermQuery termQuery = new TermQuery(new Term(field, bytesRefBuilder.toBytesRef()));
BooleanClause booleanClause = new BooleanClause(termQuery, BooleanClause.Occur.SHOULD);
builder.add(booleanClause);
}
booleanQuery = builder.build();
DocListCollector collector = new DocListCollector();
searcher.search(booleanQuery, collector);
docList = collector.getDocs();
// System.out.println("################ CASCASDE Parent Nodes:"+docList.size());
int size = docList.size();
Set set = new HashSet();
set.add(FIELD_SOLR4_ID);
for (int i = 0; i < size; i++) {
int docId = docList.get(i);
Document document = searcher.doc(docId, set);
IndexableField indexableField = document.getField(FIELD_SOLR4_ID);
String id = indexableField.stringValue();
TenantAclIdDbId ids = AlfrescoSolrDataModel.decodeNodeDocumentId(id);
// System.out.println("################## Cascade Parent:"+ ids.dbId);
childIds.add(ids.dbId);
}
} finally {
refCounted.decref();
}
List<NodeMetaData> allNodeMetaDatas = new ArrayList();
for (Long childId : childIds) {
NodeMetaDataParameters nmdp = new NodeMetaDataParameters();
nmdp.setFromNodeId(childId);
nmdp.setToNodeId(childId);
nmdp.setIncludeAclId(false);
nmdp.setIncludeAspects(false);
nmdp.setIncludeChildAssociations(false);
nmdp.setIncludeChildIds(true);
nmdp.setIncludeNodeRef(false);
nmdp.setIncludeOwner(false);
nmdp.setIncludeParentAssociations(false);
// We only care about the path and ancestors (which is included) for this case
nmdp.setIncludePaths(true);
nmdp.setIncludeProperties(false);
nmdp.setIncludeType(false);
nmdp.setIncludeTxnId(true);
// Gets only one
List<NodeMetaData> nodeMetaDatas = repositoryClient.getNodesMetaData(nmdp, 1);
allNodeMetaDatas.addAll(nodeMetaDatas);
}
return allNodeMetaDatas;
}
use of org.alfresco.solr.AlfrescoSolrDataModel.TenantAclIdDbId in project SearchServices by Alfresco.
the class SolrInformationServer method getDocsWithUncleanContent.
@Override
public List<TenantAclIdDbId> getDocsWithUncleanContent(int start, int rows) throws IOException {
RefCounted<SolrIndexSearcher> refCounted = null;
try {
List<TenantAclIdDbId> docIds = new ArrayList<>();
refCounted = this.core.getSearcher();
SolrIndexSearcher searcher = refCounted.get();
/*
* Below is the code for purging the cleanContentCache.
* The cleanContentCache is an in-memory LRU cache of the transactions that have already
* had their content fetched. This is needed because the ContentTracker does not have an up-to-date
* snapshot of the index to determine which nodes are marked as dirty/new. The cleanContentCache is used
* to filter out nodes that belong to transactions that have already been processed, which stops them from
* being re-processed.
*
* The cleanContentCache needs to be purged periodically to support retrying of failed content fetches.
* This is because fetches for individual nodes within the transaction may have failed, but the transaction will still be in the
* cleanContentCache, which prevents it from being retried.
*
* Once a transaction is purged from the cleanContentCache it will be retried automatically if it is marked dirty/new
* in current snapshot of the index.
*
* The code below runs every two minutes and purges transactions from the
* cleanContentCache that is more then 20 minutes old.
*
*/
long purgeTime = System.currentTimeMillis();
if (purgeTime - cleanContentLastPurged > 120000) {
Iterator<Entry> entries = cleanContentCache.entrySet().iterator();
while (entries.hasNext()) {
Entry<Long, Long> entry = entries.next();
long txnTime = entry.getValue();
if (purgeTime - txnTime > 1200000) {
// Purge the clean content cache of records more then 20 minutes old.
entries.remove();
}
}
cleanContentLastPurged = purgeTime;
}
long txnFloor = -1;
// This query gets lowest txnID that has dirty content.
// System.out.println("############### finding the transaction floor ################");
TermQuery termQuery1 = new TermQuery(new Term(FIELD_FTSSTATUS, FTSStatus.Dirty.toString()));
TermQuery termQuery2 = new TermQuery(new Term(FIELD_FTSSTATUS, FTSStatus.New.toString()));
BooleanClause clause1 = new BooleanClause(termQuery1, BooleanClause.Occur.SHOULD);
BooleanClause clause2 = new BooleanClause(termQuery2, BooleanClause.Occur.SHOULD);
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(clause1);
builder.add(clause2);
BooleanQuery orQuery = builder.build();
Sort sort = new Sort(new SortField(FIELD_INTXID, SortField.Type.LONG));
sort = sort.rewrite(searcher);
TopFieldCollector collector = TopFieldCollector.create(sort, 1, null, false, false, false);
// Filter transactions that have already been processed.
DelegatingCollector delegatingCollector = new TxnCacheFilter(cleanContentCache);
delegatingCollector.setLastDelegate(collector);
searcher.search(orQuery, delegatingCollector);
if (collector.getTotalHits() == 0) {
return docIds;
}
ScoreDoc[] scoreDocs = collector.topDocs().scoreDocs;
List<LeafReaderContext> leaves = searcher.getTopReaderContext().leaves();
int index = ReaderUtil.subIndex(scoreDocs[0].doc, leaves);
LeafReaderContext context = leaves.get(index);
NumericDocValues longs = context.reader().getNumericDocValues(FIELD_INTXID);
txnFloor = longs.get(scoreDocs[0].doc - context.docBase);
// System.out.println("################ Transaction floor:"+txnFloor);
// Find the next N transactions
collector = TopFieldCollector.create(new Sort(new SortField(FIELD_INTXID, SortField.Type.LONG)), rows, null, false, false, false);
delegatingCollector = new TxnFloorFilter(txnFloor, cleanContentCache);
delegatingCollector.setLastDelegate(collector);
TermQuery txnQuery = new TermQuery(new Term(FIELD_DOC_TYPE, DOC_TYPE_TX));
searcher.search(txnQuery, delegatingCollector);
TopDocs docs = collector.topDocs();
if (collector.getTotalHits() == 0) {
// No new transactions to consider
return docIds;
}
leaves = searcher.getTopReaderContext().leaves();
FieldType fieldType = searcher.getSchema().getField(FIELD_INTXID).getType();
builder = new BooleanQuery.Builder();
for (ScoreDoc scoreDoc : docs.scoreDocs) {
index = ReaderUtil.subIndex(scoreDoc.doc, leaves);
context = leaves.get(index);
longs = context.reader().getNumericDocValues(FIELD_INTXID);
long txnID = longs.get(scoreDoc.doc - context.docBase);
// Build up the query for the filter of transactions we need to pull the dirty content for.
TermQuery txnIDQuery = new TermQuery(new Term(FIELD_INTXID, fieldType.readableToIndexed(Long.toString(txnID))));
builder.add(new BooleanClause(txnIDQuery, BooleanClause.Occur.SHOULD));
}
BooleanQuery txnFilterQuery = builder.build();
// Get the docs with dirty content for the transactions gathered above.
TermQuery statusQuery1 = new TermQuery(new Term(FIELD_FTSSTATUS, FTSStatus.Dirty.toString()));
TermQuery statusQuery2 = new TermQuery(new Term(FIELD_FTSSTATUS, FTSStatus.New.toString()));
BooleanClause statusClause1 = new BooleanClause(statusQuery1, BooleanClause.Occur.SHOULD);
BooleanClause statusClause2 = new BooleanClause(statusQuery2, BooleanClause.Occur.SHOULD);
BooleanQuery.Builder builder1 = new BooleanQuery.Builder();
builder1.add(statusClause1);
builder1.add(statusClause2);
BooleanQuery statusQuery = builder1.build();
DocListCollector docListCollector = new DocListCollector();
BooleanQuery.Builder builder2 = new BooleanQuery.Builder();
builder2.add(statusQuery, BooleanClause.Occur.MUST);
builder2.add(new QueryWrapperFilter(txnFilterQuery), BooleanClause.Occur.MUST);
searcher.search(builder2.build(), docListCollector);
IntArrayList docList = docListCollector.getDocs();
int size = docList.size();
// System.out.println("############### Dirty Doc Count ################:" + size);
Set<String> fields = new HashSet<String>();
fields.add(FIELD_SOLR4_ID);
List<Long> processedTxns = new ArrayList<Long>();
for (int i = 0; i < size; ++i) {
int doc = docList.get(i);
Document document = searcher.doc(doc, fields);
index = ReaderUtil.subIndex(doc, leaves);
context = leaves.get(index);
longs = context.reader().getNumericDocValues(FIELD_INTXID);
long txnId = longs.get(doc - context.docBase);
if (!cleanContentCache.containsKey(txnId)) {
processedTxns.add(txnId);
IndexableField id = document.getField(FIELD_SOLR4_ID);
String idString = id.stringValue();
TenantAclIdDbId tenantAndDbId = AlfrescoSolrDataModel.decodeNodeDocumentId(idString);
docIds.add(tenantAndDbId);
}
}
long txnTime = System.currentTimeMillis();
for (Long l : processedTxns) {
// Save the indexVersion so we know when we can clean out this entry
cleanContentCache.put(l, txnTime);
}
return docIds;
} finally {
refCounted.decref();
}
}
use of org.alfresco.solr.AlfrescoSolrDataModel.TenantAclIdDbId in project SearchServices by Alfresco.
the class AlfrescoSolrClusteringComponent method getSolrInputDocument.
private SolrInputDocument getSolrInputDocument(Document doc, SolrQueryRequest req) throws IOException {
try {
String id = getFieldValueString(doc, FIELD_SOLR4_ID);
TenantAclIdDbId tenantAndDbId = AlfrescoSolrDataModel.decodeNodeDocumentId(id);
CoreContainer coreContainer = req.getSearcher().getCore().getCoreContainer();
AlfrescoCoreAdminHandler coreAdminHandler = (AlfrescoCoreAdminHandler) coreContainer.getMultiCoreHandler();
SolrInformationServer srv = (SolrInformationServer) coreAdminHandler.getInformationServers().get(req.getSearcher().getCore().getName());
SolrContentStore solrContentStore = srv.getSolrContentStore();
SolrInputDocument sid = solrContentStore.retrieveDocFromSolrContentStore(tenantAndDbId.tenant, tenantAndDbId.dbId);
return sid;
} catch (StringIndexOutOfBoundsException e) {
throw new IOException(e);
}
}
use of org.alfresco.solr.AlfrescoSolrDataModel.TenantAclIdDbId in project SearchServices by Alfresco.
the class ContentTracker method doTrack.
@Override
protected void doTrack() throws Exception {
// System.out.println("############## Content Tracker doTrack()");
try {
long startElapsed = System.nanoTime();
checkShutdown();
final int ROWS = contentReadBatchSize;
int start = 0;
long totalDocs = 0l;
checkShutdown();
while (true) {
try {
getWriteLock().acquire();
List<TenantAclIdDbId> docs = this.infoSrv.getDocsWithUncleanContent(start, ROWS);
// System.out.println("####################### Unclean content: "+docs.size()+" ##############################:"+totalDocs);
if (docs.size() == 0) {
break;
}
int docsUpdatedSinceLastCommit = 0;
for (TenantAclIdDbId doc : docs) {
ContentIndexWorkerRunnable ciwr = new ContentIndexWorkerRunnable(super.threadHandler, doc, infoSrv);
super.threadHandler.scheduleTask(ciwr);
docsUpdatedSinceLastCommit++;
if (docsUpdatedSinceLastCommit >= contentUpdateBatchSize) {
super.waitForAsynchronous();
checkShutdown();
// this.infoSrv.commit();
long endElapsed = System.nanoTime();
trackerStats.addElapsedContentTime(docsUpdatedSinceLastCommit, endElapsed - startElapsed);
startElapsed = endElapsed;
docsUpdatedSinceLastCommit = 0;
}
}
if (docsUpdatedSinceLastCommit > 0) {
super.waitForAsynchronous();
checkShutdown();
// this.infoSrv.commit();
long endElapsed = System.nanoTime();
trackerStats.addElapsedContentTime(docsUpdatedSinceLastCommit, endElapsed - startElapsed);
}
totalDocs += docs.size();
checkShutdown();
} finally {
getWriteLock().release();
}
}
log.info("total number of docs with content updated: " + totalDocs);
} catch (Exception e) {
throw new IOException(e);
}
}
Aggregations