Lucene++ - a full-featured, c++ search engine
API Documentation


IndexReader.h
Go to the documentation of this file.
1 // Copyright (c) 2009-2014 Alan Wright. All rights reserved.
3 // Distributable under the terms of either the Apache License (Version 2.0)
4 // or the GNU Lesser General Public License.
6 
7 #ifndef INDEXREADER_H
8 #define INDEXREADER_H
9 
10 #include "SegmentInfos.h"
11 
12 namespace Lucene {
13 
39 class LPPAPI IndexReader : public LuceneObject {
40 public:
42  virtual ~IndexReader();
43 
45 
46 public:
48  enum FieldOption {
70  FIELD_OPTION_TERMVECTOR_WITH_POSITION_OFFSET
71  };
72 
73  static const int32_t DEFAULT_TERMS_INDEX_DIVISOR;
74 
75 protected:
76  bool closed;
78  int32_t refCount;
79 
80 public:
82  int32_t getRefCount();
83 
90  void incRef();
91 
95  void decRef();
96 
99  static IndexReaderPtr open(const DirectoryPtr& directory);
100 
106  static IndexReaderPtr open(const DirectoryPtr& directory, bool readOnly);
107 
113  static IndexReaderPtr open(const IndexCommitPtr& commit, bool readOnly);
114 
122  static IndexReaderPtr open(const DirectoryPtr& directory, const IndexDeletionPolicyPtr& deletionPolicy, bool readOnly);
123 
137  static IndexReaderPtr open(const DirectoryPtr& directory, const IndexDeletionPolicyPtr& deletionPolicy, bool readOnly, int32_t termInfosIndexDivisor);
138 
147  static IndexReaderPtr open(const IndexCommitPtr& commit, const IndexDeletionPolicyPtr& deletionPolicy, bool readOnly);
148 
162  static IndexReaderPtr open(const IndexCommitPtr& commit, const IndexDeletionPolicyPtr& deletionPolicy, bool readOnly, int32_t termInfosIndexDivisor);
163 
198 
201  virtual IndexReaderPtr reopen(bool openReadOnly);
202 
206  virtual IndexReaderPtr reopen(const IndexCommitPtr& commit);
207 
217 
219  virtual LuceneObjectPtr clone(bool openReadOnly, const LuceneObjectPtr& other = LuceneObjectPtr());
220 
225 
228  static int64_t lastModified(const DirectoryPtr& directory2);
229 
234  static int64_t getCurrentVersion(const DirectoryPtr& directory);
235 
239  static MapStringString getCommitUserData(const DirectoryPtr& directory);
240 
254  virtual int64_t getVersion();
255 
258  virtual MapStringString getCommitUserData();
259 
274  virtual bool isCurrent();
275 
279  virtual bool isOptimized();
280 
290  virtual Collection<TermFreqVectorPtr> getTermFreqVectors(int32_t docNumber) = 0;
291 
301  virtual TermFreqVectorPtr getTermFreqVector(int32_t docNumber, const String& field) = 0;
302 
308  virtual void getTermFreqVector(int32_t docNumber, const String& field, const TermVectorMapperPtr& mapper) = 0;
309 
313  virtual void getTermFreqVector(int32_t docNumber, const TermVectorMapperPtr& mapper) = 0;
314 
319  static bool indexExists(const DirectoryPtr& directory);
320 
322  virtual int32_t numDocs() = 0;
323 
326  virtual int32_t maxDoc() = 0;
327 
329  int32_t numDeletedDocs();
330 
337  virtual DocumentPtr document(int32_t n);
338 
359  virtual DocumentPtr document(int32_t n, const FieldSelectorPtr& fieldSelector) = 0;
360 
362  virtual bool isDeleted(int32_t n) = 0;
363 
365  virtual bool hasDeletions() = 0;
366 
368  virtual bool hasChanges();
369 
371  virtual bool hasNorms(const String& field);
372 
376  virtual ByteArray norms(const String& field) = 0;
377 
381  virtual void norms(const String& field, ByteArray norms, int32_t offset) = 0;
382 
392  virtual void setNorm(int32_t doc, const String& field, uint8_t value);
393 
398  virtual void setNorm(int32_t doc, const String& field, double value);
399 
404  virtual TermEnumPtr terms() = 0;
405 
410  virtual TermEnumPtr terms(const TermPtr& t) = 0;
411 
413  virtual int32_t docFreq(const TermPtr& t) = 0;
414 
420  virtual TermDocsPtr termDocs(const TermPtr& term);
421 
423  virtual TermDocsPtr termDocs() = 0;
424 
432  virtual TermPositionsPtr termPositions(const TermPtr& term);
433 
436 
442  virtual void deleteDocument(int32_t docNum);
443 
450  virtual int32_t deleteDocuments(const TermPtr& term);
451 
453  virtual void undeleteAll();
454 
455  void flush();
456 
459  void flush(MapStringString commitUserData);
460 
464  void commit(MapStringString commitUserData);
465 
468  void close();
469 
473  virtual HashSet<String> getFieldNames(FieldOption fieldOption) = 0;
474 
478 
484  static void main(Collection<String> args);
485 
494 
505 
507 
510 
515  virtual int64_t getUniqueTermCount();
516 
519  virtual int32_t getTermInfosIndexDivisor();
520 
521 protected:
522  void ensureOpen();
523 
524  static IndexReaderPtr open(const DirectoryPtr& directory, const IndexDeletionPolicyPtr& deletionPolicy, const IndexCommitPtr& commit, bool readOnly, int32_t termInfosIndexDivisor);
525 
527  virtual void doSetNorm(int32_t doc, const String& field, uint8_t value) = 0;
528 
531  virtual void doDelete(int32_t docNum) = 0;
532 
534  virtual void doUndeleteAll() = 0;
535 
538  virtual void acquireWriteLock();
539 
543  void commit();
544 
546  virtual void doCommit(MapStringString commitUserData) = 0;
547 
549  virtual void doClose() = 0;
550 
551  friend class DirectoryReader;
552  friend class ParallelReader;
553 };
554 
555 }
556 
557 #endif
#define LUCENE_CLASS(Name)
Definition: LuceneObject.h:24
Utility template class to handle collections that can be safely copied and shared.
Definition: Collection.h:17
An IndexReader which reads indexes with multiple segments.
Definition: DirectoryReader.h:19
IndexReader is an abstract class, providing an interface for accessing an index. Search of an index i...
Definition: IndexReader.h:39
void close()
Closes files associated with this index. Also saves any new deletions to disk. No other methods shoul...
virtual TermDocsPtr termDocs(const TermPtr &term)
Returns an enumeration of all the documents which contain term. For each document,...
int32_t getRefCount()
Returns the current refCount for this reader.
virtual int32_t docFreq(const TermPtr &t)=0
Returns the number of documents containing the term t.
virtual IndexCommitPtr getIndexCommit()
Return the IndexCommit that this reader has opened. This method is only implemented by those readers ...
virtual IndexReaderPtr reopen()
Refreshes an IndexReader if the index has changed since this instance was (re)opened.
virtual void undeleteAll()
Undeletes all documents currently marked as deleted in this index.
static IndexReaderPtr open(const DirectoryPtr &directory, const IndexDeletionPolicyPtr &deletionPolicy, const IndexCommitPtr &commit, bool readOnly, int32_t termInfosIndexDivisor)
void incRef()
Increments the refCount of this IndexReader instance. RefCounts are used to determine when a reader c...
virtual DocumentPtr document(int32_t n, const FieldSelectorPtr &fieldSelector)=0
Get the Document at the n'th position. The FieldSelector may be used to determine what Fields to load...
void decRef()
Decreases the refCount of this IndexReader instance. If the refCount drops to 0, then pending changes...
virtual void getTermFreqVector(int32_t docNumber, const TermVectorMapperPtr &mapper)=0
Map all the term vectors for all fields in a Document.
virtual bool isOptimized()
Checks is the index is optimized (if it has a single segment and no deletions). Not implemented in th...
virtual void doDelete(int32_t docNum)=0
Implements deletion of the document numbered docNum. Applications should call deleteDocument(int) or ...
virtual LuceneObjectPtr getFieldCacheKey()
virtual int64_t getVersion()
Version number when this IndexReader was opened. Not implemented in the IndexReader base class.
virtual bool hasChanges()
Used for testing.
virtual void setNorm(int32_t doc, const String &field, uint8_t value)
Resets the normalization factor for the named field of the named document. The norm represents the pr...
virtual bool hasDeletions()=0
Returns true if any documents have been deleted.
virtual void doSetNorm(int32_t doc, const String &field, uint8_t value)=0
Implements setNorm in subclass.
virtual TermFreqVectorPtr getTermFreqVector(int32_t docNumber, const String &field)=0
Return a term frequency vector for the specified document and field. The returned vector contains ter...
bool closed
Definition: IndexReader.h:76
virtual Collection< IndexReaderPtr > getSequentialSubReaders()
Returns the sequential sub readers that this reader is logically composed of. For example,...
static bool indexExists(const DirectoryPtr &directory)
Returns true if an index exists at the specified directory. If the directory does not exist or if the...
virtual void acquireWriteLock()
Does nothing by default. Subclasses that require a write lock for index modifications must implement ...
virtual LuceneObjectPtr clone(bool openReadOnly, const LuceneObjectPtr &other=LuceneObjectPtr())
Clones the IndexReader and optionally changes readOnly. A readOnly reader cannot open a writable read...
virtual ByteArray norms(const String &field)=0
Returns the byte-encoded normalization factor for the named field of every document....
virtual bool hasNorms(const String &field)
Returns true if there are norms stored for this field.
void commit(MapStringString commitUserData)
Commit changes resulting from delete, undeleteAll, or setNorm operations. If an exception is hit,...
virtual int32_t numDocs()=0
Returns the number of documents in this index.
virtual void doUndeleteAll()=0
Implements actual undeleteAll() in subclass.
virtual TermDocsPtr termDocs()=0
Returns an unpositioned TermDocs enumerator.
virtual LuceneObjectPtr clone(const LuceneObjectPtr &other=LuceneObjectPtr())
Efficiently clones the IndexReader (sharing most internal state).
virtual void getTermFreqVector(int32_t docNumber, const String &field, const TermVectorMapperPtr &mapper)=0
Load the Term Vector into a user-defined data structure instead of relying on the parallel arrays of ...
bool _hasChanges
Definition: IndexReader.h:77
void flush(MapStringString commitUserData)
virtual bool isDeleted(int32_t n)=0
Returns true if document n has been deleted.
virtual DocumentPtr document(int32_t n)
Returns the stored fields of the n'th Document in this index.
static IndexReaderPtr open(const IndexCommitPtr &commit, bool readOnly)
Returns an IndexReader reading the index in the given IndexCommit. You should pass readOnly = true,...
virtual int32_t deleteDocuments(const TermPtr &term)
Deletes all documents that have a given term indexed. This is useful if one uses a document field to ...
static IndexReaderPtr open(const DirectoryPtr &directory, const IndexDeletionPolicyPtr &deletionPolicy, bool readOnly, int32_t termInfosIndexDivisor)
Returns an IndexReader reading the index in the given Directory, with a custom IndexDeletionPolicy....
virtual void doClose()=0
Implements close.
virtual void deleteDocument(int32_t docNum)
Deletes the document numbered docNum. Once a document is deleted it will not appear in TermDocs or Te...
void commit()
Commit changes resulting from delete, undeleteAll, or setNorm operations. If an exception is hit,...
virtual void doCommit(MapStringString commitUserData)=0
Implements commit.
static IndexReaderPtr open(const IndexCommitPtr &commit, const IndexDeletionPolicyPtr &deletionPolicy, bool readOnly, int32_t termInfosIndexDivisor)
Returns an IndexReader reading the index in the given Directory, using a specific commit and with a c...
virtual int64_t getUniqueTermCount()
Returns the number of unique terms (across all fields) in this reader.
static IndexReaderPtr open(const IndexCommitPtr &commit, const IndexDeletionPolicyPtr &deletionPolicy, bool readOnly)
Returns an IndexReader reading the index in the given Directory, using a specific commit and with a c...
int32_t refCount
Definition: IndexReader.h:78
virtual LuceneObjectPtr getDeletesCacheKey()
This returns null if the reader has no deletions.
virtual TermEnumPtr terms(const TermPtr &t)=0
Returns an enumeration of all terms starting at a given term. If the given term does not exist,...
static MapStringString getCommitUserData(const DirectoryPtr &directory)
Reads commitUserData, previously passed to IndexWriter#commit(MapStringString), from current index se...
virtual IndexReaderPtr reopen(bool openReadOnly)
Just like reopen(), except you can change the readOnly of the original reader. If the index is unchan...
static int64_t getCurrentVersion(const DirectoryPtr &directory)
Reads version number from segments files. The version number is initialized with a timestamp and then...
static IndexReaderPtr open(const DirectoryPtr &directory, bool readOnly)
Returns an IndexReader reading the index in the given Directory. You should pass readOnly = true,...
static IndexReaderPtr open(const DirectoryPtr &directory)
Returns a IndexReader reading the index in the given Directory, with readOnly = true.
virtual TermPositionsPtr termPositions()=0
Returns an unpositioned TermPositions enumerator.
virtual Collection< TermFreqVectorPtr > getTermFreqVectors(int32_t docNumber)=0
Return an array of term frequency vectors for the specified document. The array contains a vector for...
static Collection< IndexCommitPtr > listCommits(const DirectoryPtr &dir)
Returns all commit points that exist in the Directory. Normally, because the default is {},...
virtual int32_t getTermInfosIndexDivisor()
For IndexReader implementations that use TermInfosReader to read terms, this returns the current inde...
virtual int32_t maxDoc()=0
Returns one greater than the largest possible document number. This may be used to,...
static IndexReaderPtr open(const DirectoryPtr &directory, const IndexDeletionPolicyPtr &deletionPolicy, bool readOnly)
Returns an IndexReader reading the index in the given Directory, with a custom IndexDeletionPolicy....
FieldOption
Constants describing field properties, for example used for IndexReader#getFieldNames(FieldOption).
Definition: IndexReader.h:48
@ FIELD_OPTION_STORES_PAYLOADS
All fields that store payloads.
Definition: IndexReader.h:54
@ FIELD_OPTION_TERMVECTOR_WITH_OFFSET
All fields with termvectors with offset values enabled.
Definition: IndexReader.h:68
@ FIELD_OPTION_ALL
All fields.
Definition: IndexReader.h:50
@ FIELD_OPTION_INDEXED
All indexed fields.
Definition: IndexReader.h:52
@ FIELD_OPTION_INDEXED_NO_TERMVECTOR
All fields which are indexed but don't have termvectors enabled.
Definition: IndexReader.h:62
@ FIELD_OPTION_UNINDEXED
All fields which are not indexed.
Definition: IndexReader.h:58
@ FIELD_OPTION_TERMVECTOR
All fields with termvectors enabled. Please note that only standard termvector fields are returned.
Definition: IndexReader.h:64
@ FIELD_OPTION_OMIT_TERM_FREQ_AND_POSITIONS
All fields that omit tf.
Definition: IndexReader.h:56
@ FIELD_OPTION_INDEXED_WITH_TERMVECTOR
All fields which are indexed with termvectors enabled.
Definition: IndexReader.h:60
@ FIELD_OPTION_TERMVECTOR_WITH_POSITION
All fields with termvectors with position values enabled.
Definition: IndexReader.h:66
virtual MapStringString getCommitUserData()
Retrieve the String userData optionally passed to IndexWriter::commit. This will return null if Index...
int32_t numDeletedDocs()
Returns the number of deleted documents.
static int64_t lastModified(const DirectoryPtr &directory2)
Returns the time the index in the named directory was last modified. Do not use this to check whether...
virtual HashSet< String > getFieldNames(FieldOption fieldOption)=0
Get a list of unique field names that exist in this index and have the specified field option informa...
virtual ~IndexReader()
static void main(Collection< String > args)
Prints the filename and size of each file within a given compound file. Add the -extract flag to extr...
virtual TermEnumPtr terms()=0
Returns an enumeration of all the terms in the index. The enumeration is ordered by Term::compareTo()...
virtual void setNorm(int32_t doc, const String &field, double value)
Resets the normalization factor for the named field of the named document.
virtual void norms(const String &field, ByteArray norms, int32_t offset)=0
Reads the byte-encoded normalization factor for the named field of every document....
static const int32_t DEFAULT_TERMS_INDEX_DIVISOR
Definition: IndexReader.h:73
virtual IndexReaderPtr reopen(const IndexCommitPtr &commit)
Reopen this reader on a specific commit point. This always returns a readOnly reader....
virtual DirectoryPtr directory()
Returns the directory associated with this index. The default implementation returns the directory sp...
virtual TermPositionsPtr termPositions(const TermPtr &term)
Returns an enumeration of all the documents which contain term. For each document,...
virtual bool isCurrent()
Check whether any new changes have occurred to the index since this reader was opened.
Base class for all Lucene classes.
Definition: LuceneObject.h:31
An IndexReader which reads multiple, parallel indexes. Each index added must have the same number of ...
Definition: ParallelReader.h:26
Definition: AbstractAllTermDocs.h:12
boost::shared_ptr< IndexCommit > IndexCommitPtr
Definition: LuceneTypes.h:152
boost::shared_ptr< LuceneObject > LuceneObjectPtr
Definition: LuceneTypes.h:539
boost::shared_ptr< TermPositions > TermPositionsPtr
Definition: LuceneTypes.h:243
boost::shared_ptr< TermDocs > TermDocsPtr
Definition: LuceneTypes.h:236
boost::shared_ptr< FieldSelector > FieldSelectorPtr
Definition: LuceneTypes.h:77
boost::shared_ptr< Term > TermPtr
Definition: LuceneTypes.h:233
boost::shared_ptr< TermVectorMapper > TermVectorMapperPtr
Definition: LuceneTypes.h:254
boost::shared_ptr< Directory > DirectoryPtr
Definition: LuceneTypes.h:489
boost::shared_ptr< IndexDeletionPolicy > IndexDeletionPolicyPtr
Definition: LuceneTypes.h:153
boost::shared_ptr< TermEnum > TermEnumPtr
Definition: LuceneTypes.h:235
boost::shared_ptr< IndexReader > IndexReaderPtr
Definition: LuceneTypes.h:157
boost::shared_ptr< TermFreqVector > TermFreqVectorPtr
Definition: LuceneTypes.h:237
boost::shared_ptr< Document > DocumentPtr
Definition: LuceneTypes.h:74

clucene.sourceforge.net