本文整理了Java中org.apache.lucene.util.BytesRefHash
类的一些代码示例,展示了BytesRefHash
类的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。BytesRefHash
类的具体详情如下:
包路径:org.apache.lucene.util.BytesRefHash
类名称:BytesRefHash
[英]BytesRefHash is a special purpose hash-map like data-structure optimized for BytesRef instances. BytesRefHash maintains mappings of byte arrays to ids (Map<BytesRef,int>) storing the hashed bytes efficiently in continuous storage. The mapping to the id is encapsulated inside BytesRefHash and is guaranteed to be increased for each added BytesRef.
Note: The maximum capacity BytesRef instance passed to #add(BytesRef) must not be longer than ByteBlockPool#BYTE_BLOCK_SIZE-2. The internal storage is limited to 2GB total byte storage.
[中]BytesRefHash是一种专门针对BytesRef实例优化的类似哈希映射的数据结构。BytesRefHash维护字节数组到id(Map<BytesRef,int>)的映射,在连续存储中高效地存储散列字节。到id的映射被封装在BytesRefHash中,并保证为每个添加的BytesRef增加映射。
注意:传递给#add(BytesRef)的BytesRef实例的最大容量不得超过ByteBlockPool#BYTE_BLOCK_SIZE-2。内部存储限制为2GB总字节存储。
代码示例来源:origin: org.apache.lucene/lucene-core
@Override
public final Query rewrite(final IndexReader reader, final MultiTermQuery query) throws IOException {
final B builder = getTopLevelBuilder();
final ParallelArraysTermCollector col = new ParallelArraysTermCollector();
collectTerms(reader, query, col);
final int size = col.terms.size();
if (size > 0) {
final int sort[] = col.terms.sort();
final float[] boost = col.array.boost;
final TermContext[] termStates = col.array.termState;
for (int i = 0; i < size; i++) {
final int pos = sort[i];
final Term term = new Term(query.getField(), col.terms.get(pos, new BytesRef()));
assert reader.docFreq(term) == termStates[pos].docFreq();
addClause(builder, term, termStates[pos].docFreq(), boost[pos], termStates[pos]);
}
}
return build(builder);
}
代码示例来源:origin: org.apache.lucene/lucene-core
private void addOneValue(BytesRef value) {
int termID = hash.add(value);
if (termID < 0) {
termID = -termID-1;
} else {
// reserve additional space for each unique value:
// 1. when indexing, when hash is 50% full, rehash() suddenly needs 2*size ints.
// TODO: can this same OOM happen in THPF?
// 2. when flushing, we need 1 int per value (slot in the ordMap).
iwBytesUsed.addAndGet(2 * Integer.BYTES);
}
pending.add(termID);
updateBytesUsed();
}
代码示例来源:origin: org.apache.lucene/lucene-core
/** streamCount: how many streams this field stores per term.
* E.g. doc(+freq) is 1 stream, prox+offset is a second. */
public TermsHashPerField(int streamCount, FieldInvertState fieldState, TermsHash termsHash, TermsHashPerField nextPerField, FieldInfo fieldInfo) {
intPool = termsHash.intPool;
bytePool = termsHash.bytePool;
termBytePool = termsHash.termBytePool;
docState = termsHash.docState;
this.termsHash = termsHash;
bytesUsed = termsHash.bytesUsed;
this.fieldState = fieldState;
this.streamCount = streamCount;
numPostingInt = 2*streamCount;
this.fieldInfo = fieldInfo;
this.nextPerField = nextPerField;
PostingsBytesStartArray byteStarts = new PostingsBytesStartArray(this, bytesUsed);
bytesHash = new BytesRefHash(termBytePool, HASH_INIT_SIZE, byteStarts);
}
代码示例来源:origin: org.apache.lucene/lucene-core
@Override
Sorter.DocComparator getDocComparator(int maxDoc, SortField sortField) throws IOException {
assert sortField.getType().equals(SortField.Type.STRING);
assert finalSortedValues == null && finalOrdMap == null &&finalOrds == null;
int valueCount = hash.size();
finalSortedValues = hash.sort();
finalOrds = pending.build();
finalOrdMap = new int[valueCount];
for (int ord = 0; ord < valueCount; ord++) {
finalOrdMap[finalSortedValues[ord]] = ord;
}
final SortedDocValues docValues =
new BufferedSortedDocValues(hash, valueCount, finalOrds, finalSortedValues, finalOrdMap,
docsWithField.iterator());
return Sorter.getDocComparator(maxDoc, sortField, () -> docValues, () -> null);
}
代码示例来源:origin: org.apache.lucene/lucene-core
int termID = bytesHash.add(termAtt.getBytesRef());
bytesHash.byteStart(termID);
代码示例来源:origin: org.apache.lucene/lucene-grouping
@Override
public void setNextReader(LeafReaderContext readerContext) throws IOException {
this.docValues = DocValues.getSorted(readerContext.reader(), field);
this.ordsToGroupIds.clear();
BytesRef scratch = new BytesRef();
for (int i = 0; i < values.size(); i++) {
values.get(i, scratch);
int ord = this.docValues.lookupTerm(scratch);
if (ord >= 0)
ordsToGroupIds.put(ord, i);
}
}
代码示例来源:origin: org.apache.lucene/lucene-analyzers-common
BytesRef scratch1 = new BytesRef();
BytesRef scratch2 = new BytesRef();
} else {
encodeFlags(flagsScratch, wordForm);
int ord = flagLookup.add(flagsScratch.get());
if (ord < 0) {
代码示例来源:origin: sirensolutions/siren-join
@Override
protected void addAll(TermsSet terms) {
if (!(terms instanceof BytesRefTermsSet)) {
throw new UnsupportedOperationException("Invalid type: BytesRefTermsSet expected.");
}
BytesRefHash input = ((BytesRefTermsSet) terms).set;
BytesRef reusable = new BytesRef();
for (int i = 0; i < input.size(); i++) {
input.get(i, reusable);
set.add(reusable);
}
}
代码示例来源:origin: pearson-enabling-technologies/elasticsearch-approx-plugin
if(_compacted || _counter.size() > 0) {
_counter.clear();
_counter.reinit();
_compacted = false;
final int length = in.readInt();
in.read(scratch, 0, length);
_counter.add(new BytesRef(scratch, 0, length));
代码示例来源:origin: pearson-enabling-technologies/elasticsearch-approx-plugin
throw new RuntimeException(e);
final BytesRef scratch = new BytesRef();
for(int i = 0; i < ids.length; i++) {
final int id = ids[i];
if(id < 0)
break;
hash.get(id, scratch);
try {
proc.consume(scratch);
hash.clear();
代码示例来源:origin: sirensolutions/siren-join
private void readFromBytes(BytesRef bytes) {
// Read pruned flag
this.setIsPruned(bytes.bytes[bytes.offset++] == 1 ? true : false);
// Read size fo the set
int size = Bytes.readInt(bytes);
// Read terms
bytesUsed = Counter.newCounter();
pool = new ByteBlockPool(new ByteBlockPool.DirectTrackingAllocator(bytesUsed));
set = new BytesRefHash(pool);
BytesRef reusable = new BytesRef();
for (int i = 0; i < size; i++) {
Bytes.readBytesRef(bytes, reusable);
set.add(reusable);
}
}
代码示例来源:origin: org.apache.lucene/lucene-analyzers-common
this.needsInputCleaning = ignoreCase;
this.needsOutputCleaning = false; // set if we have an OCONV
flagLookup.add(new BytesRef()); // no flags -> ord 0
代码示例来源:origin: org.apache.lucene/lucene-memory
private SortedDocValues getSortedDocValues(String field, DocValuesType docValuesType) {
Info info = getInfoForExpectedDocValuesType(field, docValuesType);
if (info != null) {
BytesRef value = info.binaryProducer.dvBytesValuesSet.get(0, new BytesRef());
return sortedDocValues(value);
} else {
return null;
}
}
代码示例来源:origin: org.apache.lucene/lucene-core
@Override
public BytesRef lookupOrd(int ord) {
assert ord >= 0 && ord < sortedValues.length;
assert sortedValues[ord] >= 0 && sortedValues[ord] < sortedValues.length;
hash.get(sortedValues[ord], scratch);
return scratch;
}
代码示例来源:origin: org.apache.lucene/lucene-memory
private final int binarySearch(BytesRef b, BytesRef bytesRef, int low,
int high, BytesRefHash hash, int[] ords) {
int mid = 0;
while (low <= high) {
mid = (low + high) >>> 1;
hash.get(ords[mid], bytesRef);
final int cmp = bytesRef.compareTo(b);
if (cmp < 0) {
low = mid + 1;
} else if (cmp > 0) {
high = mid - 1;
} else {
return mid;
}
}
assert bytesRef.compareTo(b) != 0;
return -(low + 1);
}
代码示例来源:origin: org.infinispan/infinispan-embedded-query
/** Collapse the hash table and sort in-place; also sets
* this.sortedTermIDs to the results */
public int[] sortPostings() {
sortedTermIDs = bytesHash.sort(BytesRef.getUTF8SortedAsUnicodeComparator());
return sortedTermIDs;
}
代码示例来源:origin: org.apache.lucene/lucene-core
if (bytesHash.size() != 0) {
bytesHash.reinit();
代码示例来源:origin: harbby/presto-connectors
@Override
public SeekStatus seekCeil(BytesRef text) {
termUpto = binarySearch(text, br, 0, info.terms.size()-1, info.terms, info.sortedTerms, BytesRef.getUTF8SortedAsUnicodeComparator());
if (termUpto < 0) { // not found; choose successor
termUpto = -termUpto-1;
if (termUpto >= info.terms.size()) {
return SeekStatus.END;
} else {
info.terms.get(info.sortedTerms[termUpto], br);
return SeekStatus.NOT_FOUND;
}
} else {
return SeekStatus.FOUND;
}
}
代码示例来源:origin: org.apache.lucene/lucene-grouping
@Override
public void setGroups(Collection<SearchGroup<BytesRef>> searchGroups) {
this.values.clear();
this.values.reinit();
for (SearchGroup<BytesRef> sg : searchGroups) {
if (sg.groupValue == null)
includeEmpty = true;
else
this.values.add(sg.groupValue);
}
this.secondPass = true;
}
}
代码示例来源:origin: org.apache.lucene/lucene-memory
@Override
public void seekExact(long ord) {
assert ord < info.terms.size();
termUpto = (int) ord;
info.terms.get(info.sortedTerms[termUpto], br);
}
内容来源于网络,如有侵权,请联系作者删除!