本文整理了Java中org.apache.hadoop.util.hash.Hash
类的一些代码示例,展示了Hash
类的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Hash
类的具体详情如下:
包路径:org.apache.hadoop.util.hash.Hash
类名称:Hash
[英]This class represents a common API for hashing functions.
[中]此类表示哈希函数的通用API。
代码示例来源:origin: org.apache.hadoop/hadoop-common
/**
* Calculate a hash using all bytes from the input argument, and
* a seed of -1.
* @param bytes input bytes
* @return hash value
*/
public int hash(byte[] bytes) {
return hash(bytes, bytes.length, -1);
}
代码示例来源:origin: org.apache.hadoop/hadoop-common
/**
* This utility method converts the name of the configured
* hash type to a symbolic constant.
* @param conf configuration
* @return one of the predefined constants
*/
public static int getHashType(Configuration conf) {
String name = conf.get(HADOOP_UTIL_HASH_TYPE_KEY,
HADOOP_UTIL_HASH_TYPE_DEFAULT);
return parseHashType(name);
}
代码示例来源:origin: org.apache.hadoop/hadoop-common
/**
* Get a singleton instance of hash function of a type
* defined in the configuration.
* @param conf current configuration
* @return defined hash type, or null if type is invalid
*/
public static Hash getInstance(Configuration conf) {
int type = getHashType(conf);
return getInstance(type);
}
代码示例来源:origin: ch.cern.hadoop/hadoop-common
int iterations = 30;
assertTrue("testHash jenkins error !!!",
Hash.JENKINS_HASH == Hash.parseHashType("jenkins"));
assertTrue("testHash murmur error !!!",
Hash.MURMUR_HASH == Hash.parseHashType("murmur"));
assertTrue("testHash undefined",
Hash.INVALID_HASH == Hash.parseHashType("undefined"));
assertTrue("testHash", MurmurHash.getInstance() == Hash.getInstance(cfg));
JenkinsHash.getInstance() == Hash.getInstance(cfg));
MurmurHash.getInstance() == Hash.getInstance(cfg));
JenkinsHash.getInstance() == Hash.getInstance(Hash.JENKINS_HASH));
assertTrue("testHash error murmur getInstance !!!",
MurmurHash.getInstance() == Hash.getInstance(Hash.MURMUR_HASH));
Hash.getInstance(Hash.INVALID_HASH));
int murmurHash = Hash.getInstance(Hash.MURMUR_HASH).hash(LINE.getBytes());
for (int i = 0; i < iterations; i++) {
assertTrue("multiple evaluation murmur hash error !!!",
murmurHash == Hash.getInstance(Hash.MURMUR_HASH)
.hash(LINE.getBytes()));
murmurHash = Hash.getInstance(Hash.MURMUR_HASH).hash(LINE.getBytes(), 67);
for (int i = 0; i < iterations; i++) {
assertTrue(
代码示例来源:origin: org.apache.hadoop/hadoop-common
/**
* Constructor.
* <p>
* Builds a hash function that must obey to a given maximum number of returned values and a highest value.
* @param maxValue The maximum highest returned value.
* @param nbHash The number of resulting hashed values.
* @param hashType type of the hashing function (see {@link Hash}).
*/
public HashFunction(int maxValue, int nbHash, int hashType) {
if (maxValue <= 0) {
throw new IllegalArgumentException("maxValue must be > 0");
}
if (nbHash <= 0) {
throw new IllegalArgumentException("nbHash must be > 0");
}
this.maxValue = maxValue;
this.nbHash = nbHash;
this.hashFunction = Hash.getInstance(hashType);
if (this.hashFunction == null)
throw new IllegalArgumentException("hashType must be known");
}
代码示例来源:origin: org.apache.hadoop/hadoop-common
private synchronized void initBloomFilter(Configuration conf) {
numKeys = conf.getInt(
IO_MAPFILE_BLOOM_SIZE_KEY, IO_MAPFILE_BLOOM_SIZE_DEFAULT);
// vector size should be <code>-kn / (ln(1 - c^(1/k)))</code> bits for
// single key, where <code> is the number of hash functions,
// <code>n</code> is the number of keys and <code>c</code> is the desired
// max. error rate.
// Our desired error rate is by default 0.005, i.e. 0.5%
float errorRate = conf.getFloat(
IO_MAPFILE_BLOOM_ERROR_RATE_KEY, IO_MAPFILE_BLOOM_ERROR_RATE_DEFAULT);
vectorSize = (int)Math.ceil((double)(-HASH_COUNT * numKeys) /
Math.log(1.0 - Math.pow(errorRate, 1.0/HASH_COUNT)));
bloomFilter = new DynamicBloomFilter(vectorSize, HASH_COUNT,
Hash.getHashType(conf), numKeys);
}
代码示例来源:origin: com.github.jiayuhan-it/hadoop-common
int iterations = 30;
assertTrue("testHash jenkins error !!!",
Hash.JENKINS_HASH == Hash.parseHashType("jenkins"));
assertTrue("testHash murmur error !!!",
Hash.MURMUR_HASH == Hash.parseHashType("murmur"));
assertTrue("testHash undefined",
Hash.INVALID_HASH == Hash.parseHashType("undefined"));
assertTrue("testHash", MurmurHash.getInstance() == Hash.getInstance(cfg));
JenkinsHash.getInstance() == Hash.getInstance(cfg));
MurmurHash.getInstance() == Hash.getInstance(cfg));
JenkinsHash.getInstance() == Hash.getInstance(Hash.JENKINS_HASH));
assertTrue("testHash error murmur getInstance !!!",
MurmurHash.getInstance() == Hash.getInstance(Hash.MURMUR_HASH));
Hash.getInstance(Hash.INVALID_HASH));
int murmurHash = Hash.getInstance(Hash.MURMUR_HASH).hash(LINE.getBytes());
for (int i = 0; i < iterations; i++) {
assertTrue("multiple evaluation murmur hash error !!!",
murmurHash == Hash.getInstance(Hash.MURMUR_HASH)
.hash(LINE.getBytes()));
murmurHash = Hash.getInstance(Hash.MURMUR_HASH).hash(LINE.getBytes(), 67);
for (int i = 0; i < iterations; i++) {
assertTrue(
代码示例来源:origin: com.github.jiayuhan-it/hadoop-common
/**
* Constructor.
* <p>
* Builds a hash function that must obey to a given maximum number of returned values and a highest value.
* @param maxValue The maximum highest returned value.
* @param nbHash The number of resulting hashed values.
* @param hashType type of the hashing function (see {@link Hash}).
*/
public HashFunction(int maxValue, int nbHash, int hashType) {
if (maxValue <= 0) {
throw new IllegalArgumentException("maxValue must be > 0");
}
if (nbHash <= 0) {
throw new IllegalArgumentException("nbHash must be > 0");
}
this.maxValue = maxValue;
this.nbHash = nbHash;
this.hashFunction = Hash.getInstance(hashType);
if (this.hashFunction == null)
throw new IllegalArgumentException("hashType must be known");
}
代码示例来源:origin: ch.cern.hadoop/hadoop-common
private synchronized void initBloomFilter(Configuration conf) {
numKeys = conf.getInt("io.mapfile.bloom.size", 1024 * 1024);
// vector size should be <code>-kn / (ln(1 - c^(1/k)))</code> bits for
// single key, where <code> is the number of hash functions,
// <code>n</code> is the number of keys and <code>c</code> is the desired
// max. error rate.
// Our desired error rate is by default 0.005, i.e. 0.5%
float errorRate = conf.getFloat("io.mapfile.bloom.error.rate", 0.005f);
vectorSize = (int)Math.ceil((double)(-HASH_COUNT * numKeys) /
Math.log(1.0 - Math.pow(errorRate, 1.0/HASH_COUNT)));
bloomFilter = new DynamicBloomFilter(vectorSize, HASH_COUNT,
Hash.getHashType(conf), numKeys);
}
代码示例来源:origin: org.apache.hadoop/hadoop-common
/**
* Calculate a hash using all bytes from the input argument,
* and a provided seed value.
* @param bytes input bytes
* @param initval seed value
* @return hash value
*/
public int hash(byte[] bytes, int initval) {
return hash(bytes, bytes.length, initval);
}
代码示例来源:origin: ch.cern.hadoop/hadoop-common
/**
* Get a singleton instance of hash function of a type
* defined in the configuration.
* @param conf current configuration
* @return defined hash type, or null if type is invalid
*/
public static Hash getInstance(Configuration conf) {
int type = getHashType(conf);
return getInstance(type);
}
代码示例来源:origin: apache/accumulo
.ceil(-HASH_COUNT * numKeys / Math.log(1.0 - Math.pow(errorRate, 1.0 / HASH_COUNT)));
bloomFilter = new DynamicBloomFilter(vectorSize, HASH_COUNT,
Hash.parseHashType(acuconf.get(Property.TABLE_BLOOM_HASHTYPE)), numKeys);
代码示例来源:origin: io.hops/hadoop-common
/**
* Constructor.
* <p>
* Builds a hash function that must obey to a given maximum number of returned values and a highest value.
* @param maxValue The maximum highest returned value.
* @param nbHash The number of resulting hashed values.
* @param hashType type of the hashing function (see {@link Hash}).
*/
public HashFunction(int maxValue, int nbHash, int hashType) {
if (maxValue <= 0) {
throw new IllegalArgumentException("maxValue must be > 0");
}
if (nbHash <= 0) {
throw new IllegalArgumentException("nbHash must be > 0");
}
this.maxValue = maxValue;
this.nbHash = nbHash;
this.hashFunction = Hash.getInstance(hashType);
if (this.hashFunction == null)
throw new IllegalArgumentException("hashType must be known");
}
代码示例来源:origin: com.github.jiayuhan-it/hadoop-common
private synchronized void initBloomFilter(Configuration conf) {
numKeys = conf.getInt("io.mapfile.bloom.size", 1024 * 1024);
// vector size should be <code>-kn / (ln(1 - c^(1/k)))</code> bits for
// single key, where <code> is the number of hash functions,
// <code>n</code> is the number of keys and <code>c</code> is the desired
// max. error rate.
// Our desired error rate is by default 0.005, i.e. 0.5%
float errorRate = conf.getFloat("io.mapfile.bloom.error.rate", 0.005f);
vectorSize = (int)Math.ceil((double)(-HASH_COUNT * numKeys) /
Math.log(1.0 - Math.pow(errorRate, 1.0/HASH_COUNT)));
bloomFilter = new DynamicBloomFilter(vectorSize, HASH_COUNT,
Hash.getHashType(conf), numKeys);
}
代码示例来源:origin: org.apache.hadoop/hadoop-common
/**
* Hashes a specified key into several integers.
* @param k The specified key.
* @return The array of hashed values.
*/
public int[] hash(Key k){
byte[] b = k.getBytes();
if (b == null) {
throw new NullPointerException("buffer reference is null");
}
if (b.length == 0) {
throw new IllegalArgumentException("key length must be > 0");
}
int[] result = new int[nbHash];
for (int i = 0, initval = 0; i < nbHash; i++) {
initval = hashFunction.hash(b, initval);
result[i] = Math.abs(initval % maxValue);
}
return result;
}
}
代码示例来源:origin: com.github.jiayuhan-it/hadoop-common
/**
* Get a singleton instance of hash function of a type
* defined in the configuration.
* @param conf current configuration
* @return defined hash type, or null if type is invalid
*/
public static Hash getInstance(Configuration conf) {
int type = getHashType(conf);
return getInstance(type);
}
代码示例来源:origin: ch.cern.hadoop/hadoop-common
/**
* This utility method converts the name of the configured
* hash type to a symbolic constant.
* @param conf configuration
* @return one of the predefined constants
*/
public static int getHashType(Configuration conf) {
String name = conf.get("hadoop.util.hash.type", "murmur");
return parseHashType(name);
}
代码示例来源:origin: ch.cern.hadoop/hadoop-common
/**
* Constructor.
* <p>
* Builds a hash function that must obey to a given maximum number of returned values and a highest value.
* @param maxValue The maximum highest returned value.
* @param nbHash The number of resulting hashed values.
* @param hashType type of the hashing function (see {@link Hash}).
*/
public HashFunction(int maxValue, int nbHash, int hashType) {
if (maxValue <= 0) {
throw new IllegalArgumentException("maxValue must be > 0");
}
if (nbHash <= 0) {
throw new IllegalArgumentException("nbHash must be > 0");
}
this.maxValue = maxValue;
this.nbHash = nbHash;
this.hashFunction = Hash.getInstance(hashType);
if (this.hashFunction == null)
throw new IllegalArgumentException("hashType must be known");
}
代码示例来源:origin: com.facebook.hadoop/hadoop-core
private synchronized void initBloomFilter(Configuration conf) {
numKeys = conf.getInt("io.mapfile.bloom.size", 1024 * 1024);
// vector size should be <code>-kn / (ln(1 - c^(1/k)))</code> bits for
// single key, where <code> is the number of hash functions,
// <code>n</code> is the number of keys and <code>c</code> is the desired
// max. error rate.
// Our desired error rate is by default 0.005, i.e. 0.5%
float errorRate = conf.getFloat("io.mapfile.bloom.error.rate", 0.005f);
vectorSize = (int)Math.ceil((double)(-HASH_COUNT * numKeys) /
Math.log(1.0 - Math.pow(errorRate, 1.0/HASH_COUNT)));
bloomFilter = new DynamicBloomFilter(vectorSize, HASH_COUNT,
Hash.getHashType(conf), numKeys);
}
代码示例来源:origin: h2oai/h2o-2
for (int i = 0; i < numcat; ++i) {
ByteBuffer buf = ByteBuffer.allocate(4);
int hashval = murmur.hash(buf.putInt(cats[i]).array(), 4, (int)params.seed); // turn horizontalized categorical integer into another integer, based on seed
内容来源于网络,如有侵权,请联系作者删除!