我正在使用Lucene.Net进行搜索,并想知道如何处理这个线程问题.
我有一个类Test的实例,但搜索器在这种情况下不是线程安全的,因为定时器线程可以在提供请求的同时更新索引,并且我确实看到了异常.关于如何使其线程安全的任何指针.
public class Test { private static object syncObj = new object(); private System.Threading.Timer timer; private Searcher searcher; private RAMDirectory idx = new RAMDirectory(); public Test() { this.timer = new System.Threading.Timer(this.Timer_Elapsed, null, TimeSpan.Zero, TimeSpan.FromMinutes(3)); } private Searcher ESearcher { get { return this.searcher; } set { lock (syncObj) { this.searcher = value; } } } public Document CreateDocument(string title, string content) { Document doc = new Document(); doc.Add(new Field("A", title, Field.Store.YES, Field.Index.NO)); doc.Add(new Field("B", content, Field.Store.YES, Field.Index.ANALYZED)); return doc; } public ListSearch(Searcher searcher, string queryString) { List documents = new List (); QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "B", new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30)); Query query = parser.Parse(queryString); int hitsPerPage = 5; TopScoreDocCollector collector = TopScoreDocCollector.Create(2 * hitsPerPage, true); this.ESearcher.Search(query, collector); ScoreDoc[] hits = collector.TopDocs().ScoreDocs; int hitCount = collector.TotalHits > 10 ? 10 : collector.TotalHits; for (int i = 0; i < hitCount; i++) { ScoreDoc scoreDoc = hits[i]; int docId = scoreDoc.Doc; float docScore = scoreDoc.Score; Document doc = searcher.Doc(docId); documents.Add(doc); } return documents; } private void Timer_Elapsed(object sender) { this.Log("Started Updating the Search Indexing"); // Get New data to Index using (IndexWriter writer = new IndexWriter(this.idx, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30), true, IndexWriter.MaxFieldLength.LIMITED)) { foreach (var e in es) { writer.AddDocument(this.CreateDocument(e.Value.ToString(), e.Key)); } writer.Optimize(); } this.ESearcher = new IndexSearcher(this.idx); this.Log("Completed Updating the Search Indexing"); } public Result ServeRequest() { var documents = this.Search(this.EntitySearcher, searchTerm); //somelogic return result; }
}
很多事情都是"错误的".
如前所述,锁定不安全(您需要锁定读取和写入).
更重要的是,在Lucene中有更好的处理方法.首先,IndexWriter
它本身就是线程安全的.应该是的所有者Directory
.打开/关闭目录的不同部分通常是"不好的做法".
NRT(近实时)索引的样式涉及IndexReader
从IW 获取,而不是包装目录.
如果索引基本上是只读的,并且可以每天/每周批量重新生成,那么您的示例中使用的样式实际上只是"好".
我重写了这个例子来展示一些方法.显然,由于这只是测试代码,因此根据用例需要重构/增强的细微差别......
public class Test { private static object syncObj = new object(); private System.Threading.Timer timer; private Searcher searcher; private IndexWriter writer; private IndexReader reader; public Test() { writer = new IndexWriter(new RAMDirectory(), new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30), true, IndexWriter.MaxFieldLength.LIMITED); reader = writer.GetReader(); searcher = new IndexSearcher(reader); timer = new System.Threading.Timer(Timer_Elapsed, null, TimeSpan.Zero, TimeSpan.FromMinutes(3)); } public void CreateDocument(string title, string content) { var doc = new Document(); doc.Add(new Field("A", title, Field.Store.YES, Field.Index.NO)); doc.Add(new Field("B", content, Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); } public void ReplaceAll(Dictionaryes) { // pause timer timer.Change(Timeout.Infinite, Timeout.Infinite); writer.DeleteAll(); foreach (var e in es) { AddDocument(e.Value.ToString(), e.Key); } // restart timer timer.Change(TimeSpan.Zero, TimeSpan.FromMinutes(3)); } public List Search(string queryString) { var documents = new List (); var parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "B", new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30)); Query query = parser.Parse(queryString); int hitsPerPage = 5; var collector = TopScoreDocCollector.Create(2 * hitsPerPage, true); searcher.Search(query, collector); ScoreDoc[] hits = collector.TopDocs().ScoreDocs; int hitCount = collector.TotalHits > 10 ? 10 : collector.TotalHits; for (int i = 0; i < hitCount; i++) { ScoreDoc scoreDoc = hits[i]; int docId = scoreDoc.Doc; float docScore = scoreDoc.Score; Document doc = searcher.Doc(docId); documents.Add(doc); } return documents; } private void Timer_Elapsed(object sender) { if (reader.IsCurrent()) return; reader = writer.GetReader(); var newSearcher = new IndexSearcher(reader); Interlocked.Exchange(ref searcher, newSearcher); Debug.WriteLine("Searcher updated"); } public Result ServeRequest(string searchTerm) { var documents = Search(searchTerm); //somelogic var result = new Result(); return result; } }
注意:
作家"拥有"该目录
如果这是一个文件库目录,那么你将拥有Open
和Close
创建/处理编写器(处理lock
文件处理)的方法.RamDirectory可以只是GC'd
使用Interlocked.Exchange
而不是lock
.因此使用searcher
会员时零成本(这里是龙!)
新文档直接添加到作者
IsCurrent()
如果没有添加新文档,则允许零成本.根据您添加文档的频率,您可能根本不需要计时器(只需调用Timer_Elapsed
- 显然重命名 - 位于顶部Search
).
不要使用Optimize()
它是以前版本的宿醉,并且非常不鼓励使用它(性能和磁盘I/O原因)
最后,如果您使用的是Lucene.net v4.8,那么您应该使用SearcherManager
(如另一个答案所示).但是使用ctor并将其IndexWriter
保持为"单身"(与范围相同writer
).它将为您处理锁定和获取新读者.