
x33g5p2x  于2022-02-01 转载在 其他  



[英]Calculates the optimum initial size for a hash table given the maximum number of elements it will need to hold. The optimum size is the smallest size that is guaranteed not to result in any rehash/table-resize operations.


代码示例来源:origin: broadgsa/gatk

public static Set<String> getContigNames(SAMSequenceDictionary dict) {
  final Set<String> contigNames = new HashSet<String>(Utils.optimumHashSize(dict.size()));
  for (SAMSequenceRecord dictionaryEntry : dict.getSequences())
  return contigNames;

代码示例来源:origin: broadgsa/gatk

 * Finds the names of any contigs indexed differently in the two sequence dictionaries that also
 * occur in the provided set of intervals.
 * @param intervals GenomeLocSortedSet containing the intervals to check
 * @param dict1 first sequence dictionary
 * @param dict2 second sequence dictionary
 * @return a Set of the names of the contigs indexed differently in dict1 vs dict2 that also
 *         occur in the provided intervals, or an empty Set if there are no such contigs
private static Set<String> findMisindexedContigsInIntervals( final GenomeLocSortedSet intervals,
                               final SAMSequenceDictionary dict1,
                               final SAMSequenceDictionary dict2 ) {
  final Set<String> differentlyIndexedCommonContigs = getDifferentlyIndexedCommonContigs(getCommonContigsByName(dict1, dict2), dict1, dict2);
  final Set<String> misindexedContigsInIntervals = new LinkedHashSet<String>(Utils.optimumHashSize(differentlyIndexedCommonContigs.size()));
  // We know differentlyIndexedCommonContigs is a HashSet, so this loop is O(intervals)
  for ( GenomeLoc interval : intervals ) {
    if ( differentlyIndexedCommonContigs.contains(interval.getContig()) ) {
  return misindexedContigsInIntervals;

代码示例来源:origin: broadgsa/gatk

 * Gets the set of names of the contigs found in both sequence dictionaries that have different indices
 * in the two dictionaries.
 * @param commonContigs Set of names of the contigs common to both dictionaries
 * @param dict1 first sequence dictionary
 * @param dict2 second sequence dictionary
 * @return a Set containing the names of the common contigs indexed differently in dict1 vs. dict2,
 *         or an empty Set if there are no such contigs
private static Set<String> getDifferentlyIndexedCommonContigs( final Set<String> commonContigs,
                                final SAMSequenceDictionary dict1,
                                final SAMSequenceDictionary dict2 ) {
  final Set<String> differentlyIndexedCommonContigs = new LinkedHashSet<String>(Utils.optimumHashSize(commonContigs.size()));
  for ( String commonContig : commonContigs ) {
    if ( dict1.getSequence(commonContig).getSequenceIndex() != dict2.getSequence(commonContig).getSequenceIndex() ) {
  return differentlyIndexedCommonContigs;

代码示例来源:origin: broadgsa/gatk

public Map<String, Object> getAnnotations() {
  Map<String, Object> annotations = new LinkedHashMap<String, Object>(Utils.optimumHashSize(InfoFieldKey.values().length));
  addAnnotation(annotations, InfoFieldKey.EFFECT_KEY.getKeyName(), effect.toString());
  addAnnotation(annotations, InfoFieldKey.IMPACT_KEY.getKeyName(), impact.toString());
  addAnnotation(annotations, InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName(), functionalClass.toString());
  addAnnotation(annotations, InfoFieldKey.CODON_CHANGE_KEY.getKeyName(), codonChange);
  addAnnotation(annotations, InfoFieldKey.AMINO_ACID_CHANGE_KEY.getKeyName(), aminoAcidChange);
  addAnnotation(annotations, InfoFieldKey.GENE_NAME_KEY.getKeyName(), geneName);
  addAnnotation(annotations, InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(), geneBiotype);
  addAnnotation(annotations, InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(), transcriptID);
  addAnnotation(annotations, InfoFieldKey.EXON_ID_KEY.getKeyName(), exonID);
  return annotations;
