Lucene++ - a full-featured, c++ search engine
API Documentation


TermVectorsReader.h
Go to the documentation of this file.
1 // Copyright (c) 2009-2014 Alan Wright. All rights reserved.
3 // Distributable under the terms of either the Apache License (Version 2.0)
4 // or the GNU Lesser General Public License.
6 
7 #ifndef TERMVECTORSREADER_H
8 #define TERMVECTORSREADER_H
9 
10 #include "TermVectorMapper.h"
11 
12 namespace Lucene {
13 
14 class LPPAPI TermVectorsReader : public LuceneObject {
15 public:
17  TermVectorsReader(const DirectoryPtr& d, const String& segment, const FieldInfosPtr& fieldInfos);
18  TermVectorsReader(const DirectoryPtr& d, const String& segment, const FieldInfosPtr& fieldInfos,
19  int32_t readBufferSize, int32_t docStoreOffset = -1, int32_t size = 0);
20  virtual ~TermVectorsReader();
21 
23 
24 public:
26  static const int32_t FORMAT_VERSION;
27 
29  static const int32_t FORMAT_VERSION2;
30 
32  static const int32_t FORMAT_UTF8_LENGTH_IN_BYTES;
33 
35  static const int32_t FORMAT_CURRENT;
36 
38  static const int32_t FORMAT_SIZE;
39 
40  static const uint8_t STORE_POSITIONS_WITH_TERMVECTOR;
41  static const uint8_t STORE_OFFSET_WITH_TERMVECTOR;
42 
43 protected:
45 
49  int32_t _size;
50  int32_t numTotalDocs;
51 
53  int32_t docStoreOffset;
54 
55  int32_t format;
56 
57 public:
60 
63 
65 
69  void rawDocs(Collection<int32_t> tvdLengths, Collection<int32_t> tvfLengths, int32_t startDocID, int32_t numDocs);
70 
71  void close();
72 
74  int32_t size();
75 
76  void get(int32_t docNum, const String& field, const TermVectorMapperPtr& mapper);
77 
83  TermFreqVectorPtr get(int32_t docNum, const String& field);
84 
90 
91  void get(int32_t docNumber, const TermVectorMapperPtr& mapper);
92 
94 
95 protected:
96  void ConstructReader(const DirectoryPtr& d, const String& segment, const FieldInfosPtr& fieldInfos, int32_t readBufferSize, int32_t docStoreOffset, int32_t size);
97 
98  void seekTvx(int32_t docNum);
99 
100  int32_t checkValidFormat(const IndexInputPtr& in);
101 
103  Collection<String> readFields(int32_t fieldCount);
104 
107 
110 
114  void readTermVector(const String& field, int64_t tvfPointer, const TermVectorMapperPtr& mapper);
115 };
116 
119 public:
122 
124 
125 protected:
133  String field;
134 
135 public:
138  virtual void setExpectations(const String& field, int32_t numTerms, bool storeOffsets, bool storePositions);
139 
141  virtual void map(const String& term, int32_t frequency, Collection<TermVectorOffsetInfoPtr> offsets, Collection<int32_t> positions);
142 
146 };
147 
148 }
149 
150 #endif
#define LUCENE_CLASS(Name)
Definition: LuceneObject.h:24
Base class for all Lucene classes.
Definition: LuceneObject.h:31
Models the existing parallel array structure.
Definition: TermVectorsReader.h:118
int32_t currentPosition
Definition: TermVectorsReader.h:130
Collection< Collection< int32_t > > positions
Definition: TermVectorsReader.h:128
bool storingPositions
Definition: TermVectorsReader.h:132
String field
Definition: TermVectorsReader.h:133
TermFreqVectorPtr materializeVector()
Construct the vector.
Collection< int32_t > termFreqs
Definition: TermVectorsReader.h:127
Collection< String > terms
Definition: TermVectorsReader.h:123
virtual void map(const String &term, int32_t frequency, Collection< TermVectorOffsetInfoPtr > offsets, Collection< int32_t > positions)
Map the Term Vector information into your own structure.
Collection< Collection< TermVectorOffsetInfoPtr > > offsets
Definition: TermVectorsReader.h:129
bool storingOffsets
Definition: TermVectorsReader.h:131
virtual void setExpectations(const String &field, int32_t numTerms, bool storeOffsets, bool storePositions)
Tell the mapper what to expect in regards to field, number of terms, offset and position storage....
The TermVectorMapper can be used to map Term Vectors into your own structure instead of the parallel ...
Definition: TermVectorMapper.h:18
Definition: TermVectorsReader.h:14
int32_t checkValidFormat(const IndexInputPtr &in)
Collection< TermFreqVectorPtr > get(int32_t docNum)
Return all term vectors stored for this document or null if the could not be read in.
FieldInfosPtr fieldInfos
Definition: TermVectorsReader.h:44
IndexInputPtr getTvfStream()
Used for bulk copy when merging.
static const int32_t FORMAT_VERSION
NOTE: if you make a new format, it must be larger than the current format.
Definition: TermVectorsReader.h:22
int32_t format
Definition: TermVectorsReader.h:55
void readTermVector(const String &field, int64_t tvfPointer, const TermVectorMapperPtr &mapper)
void readTermVectors(Collection< String > fields, Collection< int64_t > tvfPointers, const TermVectorMapperPtr &mapper)
static const int32_t FORMAT_CURRENT
NOTE: always change this if you switch to a new format.
Definition: TermVectorsReader.h:35
TermFreqVectorPtr get(int32_t docNum, const String &field)
Retrieve the term vector for the given document and field.
Collection< int64_t > readTvfPointers(int32_t fieldCount)
Reads the long[] offsets into TVF; you have to pre-seek tvx/tvd to the right point.
Collection< TermFreqVectorPtr > readTermVectors(int32_t docNum, Collection< String > fields, Collection< int64_t > tvfPointers)
static const uint8_t STORE_OFFSET_WITH_TERMVECTOR
Definition: TermVectorsReader.h:41
static const uint8_t STORE_POSITIONS_WITH_TERMVECTOR
Definition: TermVectorsReader.h:40
static const int32_t FORMAT_VERSION2
Changes to speed up bulk merging of term vectors.
Definition: TermVectorsReader.h:29
IndexInputPtr tvd
Definition: TermVectorsReader.h:47
void get(int32_t docNumber, const TermVectorMapperPtr &mapper)
void rawDocs(Collection< int32_t > tvdLengths, Collection< int32_t > tvfLengths, int32_t startDocID, int32_t numDocs)
Retrieve the length (in bytes) of the tvd and tvf entries for the next numDocs starting with startDoc...
IndexInputPtr getTvdStream()
Used for bulk copy when merging.
void seekTvx(int32_t docNum)
TermVectorsReader(const DirectoryPtr &d, const String &segment, const FieldInfosPtr &fieldInfos, int32_t readBufferSize, int32_t docStoreOffset=-1, int32_t size=0)
static const int32_t FORMAT_SIZE
The size in bytes that the FORMAT_VERSION will take up at the beginning of each file.
Definition: TermVectorsReader.h:38
TermVectorsReader(const DirectoryPtr &d, const String &segment, const FieldInfosPtr &fieldInfos)
Collection< String > readFields(int32_t fieldCount)
Reads the String[] fields; you have to pre-seek tvd to the right point.
IndexInputPtr tvx
Definition: TermVectorsReader.h:46
int32_t numTotalDocs
Definition: TermVectorsReader.h:50
int32_t docStoreOffset
The docID offset where our docs begin in the index file. This will be 0 if we have our own private fi...
Definition: TermVectorsReader.h:53
int32_t _size
Definition: TermVectorsReader.h:49
void ConstructReader(const DirectoryPtr &d, const String &segment, const FieldInfosPtr &fieldInfos, int32_t readBufferSize, int32_t docStoreOffset, int32_t size)
void get(int32_t docNum, const String &field, const TermVectorMapperPtr &mapper)
virtual LuceneObjectPtr clone(const LuceneObjectPtr &other=LuceneObjectPtr())
Return clone of this object.
IndexInputPtr tvf
Definition: TermVectorsReader.h:48
static const int32_t FORMAT_UTF8_LENGTH_IN_BYTES
Changed strings to UTF8 with length-in-bytes not length-in-chars.
Definition: TermVectorsReader.h:32
Definition: AbstractAllTermDocs.h:12
boost::shared_ptr< LuceneObject > LuceneObjectPtr
Definition: LuceneTypes.h:539
boost::shared_ptr< FieldInfos > FieldInfosPtr
Definition: LuceneTypes.h:127
boost::shared_ptr< TermVectorMapper > TermVectorMapperPtr
Definition: LuceneTypes.h:254
boost::shared_ptr< IndexInput > IndexInputPtr
Definition: LuceneTypes.h:493
boost::shared_ptr< Directory > DirectoryPtr
Definition: LuceneTypes.h:489
boost::shared_ptr< TermFreqVector > TermFreqVectorPtr
Definition: LuceneTypes.h:237

clucene.sourceforge.net