org.apache.lucene.util.BytesRefHash类的使用及代码示例

x33g5p2x  于2022-01-17 转载在 其他  
字(8.0k)|赞(0)|评价(0)|浏览(121)

本文整理了Java中org.apache.lucene.util.BytesRefHash类的一些代码示例,展示了BytesRefHash类的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。BytesRefHash类的具体详情如下:
包路径:org.apache.lucene.util.BytesRefHash
类名称:BytesRefHash

BytesRefHash介绍

[英]BytesRefHash is a special purpose hash-map like data-structure optimized for BytesRef instances. BytesRefHash maintains mappings of byte arrays to ids (Map<BytesRef,int>) storing the hashed bytes efficiently in continuous storage. The mapping to the id is encapsulated inside BytesRefHash and is guaranteed to be increased for each added BytesRef.

Note: The maximum capacity BytesRef instance passed to #add(BytesRef) must not be longer than ByteBlockPool#BYTE_BLOCK_SIZE-2. The internal storage is limited to 2GB total byte storage.
[中]BytesRefHash是一种专门针对BytesRef实例优化的类似哈希映射的数据结构。BytesRefHash维护字节数组到id(Map<BytesRef,int>)的映射,在连续存储中高效地存储散列字节。到id的映射被封装在BytesRefHash中,并保证为每个添加的BytesRef增加映射。
注意:传递给#add(BytesRef)的BytesRef实例的最大容量不得超过ByteBlockPool#BYTE_BLOCK_SIZE-2。内部存储限制为2GB总字节存储。

代码示例

代码示例来源:origin: org.apache.lucene/lucene-core

@Override
public final Query rewrite(final IndexReader reader, final MultiTermQuery query) throws IOException {
 final B builder = getTopLevelBuilder();
 final ParallelArraysTermCollector col = new ParallelArraysTermCollector();
 collectTerms(reader, query, col);
 
 final int size = col.terms.size();
 if (size > 0) {
  final int sort[] = col.terms.sort();
  final float[] boost = col.array.boost;
  final TermContext[] termStates = col.array.termState;
  for (int i = 0; i < size; i++) {
   final int pos = sort[i];
   final Term term = new Term(query.getField(), col.terms.get(pos, new BytesRef()));
   assert reader.docFreq(term) == termStates[pos].docFreq();
   addClause(builder, term, termStates[pos].docFreq(), boost[pos], termStates[pos]);
  }
 }
 return build(builder);
}

代码示例来源:origin: org.apache.lucene/lucene-core

private void addOneValue(BytesRef value) {
 int termID = hash.add(value);
 if (termID < 0) {
  termID = -termID-1;
 } else {
  // reserve additional space for each unique value:
  // 1. when indexing, when hash is 50% full, rehash() suddenly needs 2*size ints.
  //    TODO: can this same OOM happen in THPF?
  // 2. when flushing, we need 1 int per value (slot in the ordMap).
  iwBytesUsed.addAndGet(2 * Integer.BYTES);
 }
 
 pending.add(termID);
 updateBytesUsed();
}

代码示例来源:origin: org.apache.lucene/lucene-core

/** streamCount: how many streams this field stores per term.
 * E.g. doc(+freq) is 1 stream, prox+offset is a second. */
public TermsHashPerField(int streamCount, FieldInvertState fieldState, TermsHash termsHash, TermsHashPerField nextPerField, FieldInfo fieldInfo) {
 intPool = termsHash.intPool;
 bytePool = termsHash.bytePool;
 termBytePool = termsHash.termBytePool;
 docState = termsHash.docState;
 this.termsHash = termsHash;
 bytesUsed = termsHash.bytesUsed;
 this.fieldState = fieldState;
 this.streamCount = streamCount;
 numPostingInt = 2*streamCount;
 this.fieldInfo = fieldInfo;
 this.nextPerField = nextPerField;
 PostingsBytesStartArray byteStarts = new PostingsBytesStartArray(this, bytesUsed);
 bytesHash = new BytesRefHash(termBytePool, HASH_INIT_SIZE, byteStarts);
}

代码示例来源:origin: org.apache.lucene/lucene-core

@Override
Sorter.DocComparator getDocComparator(int maxDoc, SortField sortField) throws IOException {
 assert sortField.getType().equals(SortField.Type.STRING);
 assert finalSortedValues == null && finalOrdMap == null &&finalOrds == null;
 int valueCount = hash.size();
 finalSortedValues = hash.sort();
 finalOrds = pending.build();
 finalOrdMap = new int[valueCount];
 for (int ord = 0; ord < valueCount; ord++) {
  finalOrdMap[finalSortedValues[ord]] = ord;
 }
 final SortedDocValues docValues =
   new BufferedSortedDocValues(hash, valueCount, finalOrds, finalSortedValues, finalOrdMap,
     docsWithField.iterator());
 return Sorter.getDocComparator(maxDoc, sortField, () -> docValues, () -> null);
}

代码示例来源:origin: org.apache.lucene/lucene-core

int termID = bytesHash.add(termAtt.getBytesRef());
 bytesHash.byteStart(termID);

代码示例来源:origin: org.apache.lucene/lucene-grouping

@Override
public void setNextReader(LeafReaderContext readerContext) throws IOException {
 this.docValues = DocValues.getSorted(readerContext.reader(), field);
 this.ordsToGroupIds.clear();
 BytesRef scratch = new BytesRef();
 for (int i = 0; i < values.size(); i++) {
  values.get(i, scratch);
  int ord = this.docValues.lookupTerm(scratch);
  if (ord >= 0)
   ordsToGroupIds.put(ord, i);
 }
}

代码示例来源:origin: org.apache.lucene/lucene-analyzers-common

BytesRef scratch1 = new BytesRef();
BytesRef scratch2 = new BytesRef();
 } else {
  encodeFlags(flagsScratch, wordForm);
  int ord = flagLookup.add(flagsScratch.get());
  if (ord < 0) {

代码示例来源:origin: sirensolutions/siren-join

@Override
protected void addAll(TermsSet terms) {
 if (!(terms instanceof BytesRefTermsSet)) {
  throw new UnsupportedOperationException("Invalid type: BytesRefTermsSet expected.");
 }
 BytesRefHash input = ((BytesRefTermsSet) terms).set;
 BytesRef reusable = new BytesRef();
 for (int i = 0; i < input.size(); i++) {
  input.get(i, reusable);
  set.add(reusable);
 }
}

代码示例来源:origin: pearson-enabling-technologies/elasticsearch-approx-plugin

if(_compacted || _counter.size() > 0) {
  _counter.clear();
  _counter.reinit();
  _compacted = false;
  final int length = in.readInt();
  in.read(scratch, 0, length);
  _counter.add(new BytesRef(scratch, 0, length));

代码示例来源:origin: pearson-enabling-technologies/elasticsearch-approx-plugin

throw new RuntimeException(e);
final BytesRef scratch = new BytesRef();
for(int i = 0; i < ids.length; i++) {
  final int id = ids[i];
  if(id < 0)
    break;
  hash.get(id, scratch);
  try {
    proc.consume(scratch);
hash.clear();

代码示例来源:origin: sirensolutions/siren-join

private void readFromBytes(BytesRef bytes) {
 // Read pruned flag
 this.setIsPruned(bytes.bytes[bytes.offset++] == 1 ? true : false);
 // Read size fo the set
 int size = Bytes.readInt(bytes);
 // Read terms
 bytesUsed = Counter.newCounter();
 pool = new ByteBlockPool(new ByteBlockPool.DirectTrackingAllocator(bytesUsed));
 set = new BytesRefHash(pool);
 BytesRef reusable = new BytesRef();
 for (int i = 0; i < size; i++) {
  Bytes.readBytesRef(bytes, reusable);
  set.add(reusable);
 }
}

代码示例来源:origin: org.apache.lucene/lucene-analyzers-common

this.needsInputCleaning = ignoreCase;
this.needsOutputCleaning = false; // set if we have an OCONV
flagLookup.add(new BytesRef()); // no flags -> ord 0

代码示例来源:origin: org.apache.lucene/lucene-memory

private SortedDocValues getSortedDocValues(String field, DocValuesType docValuesType) {
 Info info = getInfoForExpectedDocValuesType(field, docValuesType);
 if (info != null) {
  BytesRef value = info.binaryProducer.dvBytesValuesSet.get(0, new BytesRef());
  return sortedDocValues(value);
 } else {
  return null;
 }
}

代码示例来源:origin: org.apache.lucene/lucene-core

@Override
public BytesRef lookupOrd(int ord) {
 assert ord >= 0 && ord < sortedValues.length;
 assert sortedValues[ord] >= 0 && sortedValues[ord] < sortedValues.length;
 hash.get(sortedValues[ord], scratch);
 return scratch;
}

代码示例来源:origin: org.apache.lucene/lucene-memory

private final int binarySearch(BytesRef b, BytesRef bytesRef, int low,
  int high, BytesRefHash hash, int[] ords) {
 int mid = 0;
 while (low <= high) {
  mid = (low + high) >>> 1;
  hash.get(ords[mid], bytesRef);
  final int cmp = bytesRef.compareTo(b);
  if (cmp < 0) {
   low = mid + 1;
  } else if (cmp > 0) {
   high = mid - 1;
  } else {
   return mid;
  }
 }
 assert bytesRef.compareTo(b) != 0;
 return -(low + 1);
}

代码示例来源:origin: org.infinispan/infinispan-embedded-query

/** Collapse the hash table and sort in-place; also sets
 * this.sortedTermIDs to the results */
public int[] sortPostings() {
 sortedTermIDs = bytesHash.sort(BytesRef.getUTF8SortedAsUnicodeComparator());
 return sortedTermIDs;
}

代码示例来源:origin: org.apache.lucene/lucene-core

if (bytesHash.size() != 0) {
bytesHash.reinit();

代码示例来源:origin: harbby/presto-connectors

@Override
public SeekStatus seekCeil(BytesRef text) {
 termUpto = binarySearch(text, br, 0, info.terms.size()-1, info.terms, info.sortedTerms, BytesRef.getUTF8SortedAsUnicodeComparator());
 if (termUpto < 0) { // not found; choose successor
  termUpto = -termUpto-1;
  if (termUpto >= info.terms.size()) {
   return SeekStatus.END;
  } else {
   info.terms.get(info.sortedTerms[termUpto], br);
   return SeekStatus.NOT_FOUND;
  }
 } else {
  return SeekStatus.FOUND;
 }
}

代码示例来源:origin: org.apache.lucene/lucene-grouping

@Override
 public void setGroups(Collection<SearchGroup<BytesRef>> searchGroups) {
  this.values.clear();
  this.values.reinit();
  for (SearchGroup<BytesRef> sg : searchGroups) {
   if (sg.groupValue == null)
    includeEmpty = true;
   else
    this.values.add(sg.groupValue);
  }
  this.secondPass = true;
 }
}

代码示例来源:origin: org.apache.lucene/lucene-memory

@Override
public void seekExact(long ord) {
 assert ord < info.terms.size();
 termUpto = (int) ord;
 info.terms.get(info.sortedTerms[termUpto], br);
}

相关文章