package com.dy.common.util;
|
|
/**
|
* MurmurHash算法:高运算性能,低碰撞率,由Austin Appleby创建于2008年,
|
* 现已应用到Hadoop、libstdc++、nginx、libmemcached等开源系统。2011年
|
* Appleby被Google雇佣,随后Google推出其变种的CityHash算法。
|
*
|
* 官方网站:https://sites.google.com/site/murmurhash/
|
*
|
* MurmurHash算法,自称超级快的hash算法,是FNV的4-5倍。官方数据如下:
|
*
|
* OneAtATime – 354.163715 mb/sec
|
* FNV – 443.668038 mb/sec
|
* SuperFastHash – 985.335173 mb/sec
|
* lookup3 – 988.080652 mb/sec
|
* MurmurHash 1.0 – 1363.293480 mb/sec
|
* MurmurHash 2.0 – 2056.885653 mb/sec
|
*
|
* 但也有文章声称,只有当key的长度大于10字节的时候,MurmurHash的运算速
|
* 度才快于DJB。从计算速度上来看,MurmurHash只适用于已知长度的、长度比
|
* 较长的字符。
|
*
|
* 哈希值分布非常均匀,即低碰撞率
|
* 比Crc16更为均匀
|
*/
|
public final class MurmurHash {
|
|
private byte[] toBytesWithoutEncoding(String str) {
|
int len = str.length();
|
int pos = 0;
|
byte[] buf = new byte[len << 1];
|
for (int i = 0; i < len; i++) {
|
char c = str.charAt(i);
|
buf[pos++] = (byte) (c & 0xFF);
|
buf[pos++] = (byte) (c >> 8);
|
}
|
return buf;
|
}
|
|
/**
|
* 刘润玉增加方法于2016-12-02
|
* @param data
|
* @param length
|
* @return
|
*/
|
public int hash16_plus(final byte[] data, int length) {
|
int hash = hash32(data, length, 0x9747b28c);
|
hash = hash % 65535 ;
|
if(hash < 0){
|
hash = - hash ;
|
}
|
return hash ;
|
}
|
/**
|
* 刘润玉增加方法于2016-12-02
|
* @param data
|
* @return
|
*/
|
public int hash16_plus(final String data) {
|
byte[] bytes = toBytesWithoutEncoding(data);
|
int hash = hash32(bytes, bytes.length, 0x9747b28c);
|
hash = hash % 65535 ;
|
if(hash < 0){
|
hash = - hash ;
|
}
|
return hash ;
|
}
|
|
/**
|
* Generates 32 bit hash from byte array with default seed value.
|
* @param data byte array to hash
|
* @param length length of the array to hash
|
* @return 32 bit hash of the given array
|
*/
|
public int hash32(final byte[] data, int length) {
|
return hash32(data, length, 0x9747b28c);
|
}
|
|
public int hash32(final String data) {
|
byte[] bytes = toBytesWithoutEncoding(data);
|
return hash32(bytes, bytes.length, 0x9747b28c);
|
}
|
|
/**
|
* Generates 64 bit hash from byte array with default seed value.
|
* @param data byte array to hash
|
* @param length length of the array to hash
|
* @return 64 bit hash of the given string
|
*/
|
public long hash64(final byte[] data, int length) {
|
return hash64(data, length, 0xe17a1465);
|
}
|
|
|
public long hash64(final String data) {
|
byte[] bytes = toBytesWithoutEncoding(data);
|
return hash64(bytes, bytes.length);
|
}
|
/**
|
* Generates 32 bit hash from byte array of the given length and seed.
|
* @param data byte array to hash
|
* @param length length of the array
|
* @param seed initial seed value
|
* @return 32 bit hash of the given array
|
*/
|
public int hash32(final byte[] data, int length, int seed) {
|
// 'm' and 'r' are mixing constants generated offline.
|
// They're not really 'magic', they just happen to work well.
|
final int m = 0x5bd1e995;
|
final int r = 24;
|
// Initialize the hash to a random value
|
int h = seed ^ length;
|
int length4 = length / 4;
|
for (int i = 0; i < length4; i++) {
|
final int i4 = i * 4;
|
int k = (data[i4 + 0] & 0xff) + ((data[i4 + 1] & 0xff) << 8)
|
+ ((data[i4 + 2] & 0xff) << 16)
|
+ ((data[i4 + 3] & 0xff) << 24);
|
k *= m;
|
k ^= k >>> r;
|
k *= m;
|
h *= m;
|
h ^= k;
|
}
|
// Handle the last few bytes of the input array
|
switch (length % 4) {
|
case 3:
|
h ^= (data[(length & ~3) + 2] & 0xff) << 16;
|
case 2:
|
h ^= (data[(length & ~3) + 1] & 0xff) << 8;
|
case 1:
|
h ^= (data[length & ~3] & 0xff);
|
h *= m;
|
}
|
h ^= h >>> 13;
|
h *= m;
|
h ^= h >>> 15;
|
return h;
|
}
|
/**
|
* Generates 64 bit hash from byte array of the given length and seed.
|
* @param data byte array to hash
|
* @param length length of the array to hash
|
* @param seed initial seed value
|
* @return 64 bit hash of the given array
|
*/
|
public long hash64(final byte[] data, int length, int seed) {
|
final long m = 0xc6a4a7935bd1e995L;
|
final int r = 47;
|
long h = (seed & 0xffffffffl) ^ (length * m);
|
int length8 = length / 8;
|
for (int i = 0; i < length8; i++) {
|
final int i8 = i * 8;
|
long k = ((long) data[i8 + 0] & 0xff)
|
+ (((long) data[i8 + 1] & 0xff) << 8)
|
+ (((long) data[i8 + 2] & 0xff) << 16)
|
+ (((long) data[i8 + 3] & 0xff) << 24)
|
+ (((long) data[i8 + 4] & 0xff) << 32)
|
+ (((long) data[i8 + 5] & 0xff) << 40)
|
+ (((long) data[i8 + 6] & 0xff) << 48)
|
+ (((long) data[i8 + 7] & 0xff) << 56);
|
k *= m;
|
k ^= k >>> r;
|
k *= m;
|
h ^= k;
|
h *= m;
|
}
|
switch (length % 8) {
|
case 7:
|
h ^= (long) (data[(length & ~7) + 6] & 0xff) << 48;
|
case 6:
|
h ^= (long) (data[(length & ~7) + 5] & 0xff) << 40;
|
case 5:
|
h ^= (long) (data[(length & ~7) + 4] & 0xff) << 32;
|
case 4:
|
h ^= (long) (data[(length & ~7) + 3] & 0xff) << 24;
|
case 3:
|
h ^= (long) (data[(length & ~7) + 2] & 0xff) << 16;
|
case 2:
|
h ^= (long) (data[(length & ~7) + 1] & 0xff) << 8;
|
case 1:
|
h ^= (long) (data[length & ~7] & 0xff);
|
h *= m;
|
}
|
;
|
h ^= h >>> r;
|
h *= m;
|
h ^= h >>> r;
|
return h;
|
}
|
public static void main(String[] args) {
|
test1() ;
|
test2() ;
|
}
|
|
public static void test1() {
|
String regionNum = "110000" ;
|
String temp = "" ;
|
String rtuAddr = "" ;
|
|
int total0 = 0 ;
|
int total1 = 0 ;
|
int total2 = 0 ;
|
int total3 = 0 ;
|
int total4 = 0 ;
|
int total5 = 0 ;
|
int totalx = 0 ;
|
int totaly = 0 ;
|
MurmurHash mhash = new MurmurHash() ;
|
Long start = System.currentTimeMillis() ;
|
for(int i = 0 ; i < 10000; i++){
|
temp = "" + i ;
|
while(temp.length() < 5){
|
temp = "0" + temp ;
|
}
|
rtuAddr = regionNum + temp ;
|
int hash = mhash.hash16_plus(rtuAddr) ;
|
if(hash < 0){
|
total0++ ;
|
}
|
if(hash >= 0 && hash < 1000){
|
total1++ ;
|
}
|
if(hash >= 1000 && hash < 2000){
|
total2++ ;
|
}
|
if(hash >= 2000 && hash < 3000){
|
total3++ ;
|
}
|
if(hash >= 3000 && hash < 4000){
|
total4++ ;
|
}
|
if(hash >= 4000 && hash < 5000){
|
total5++ ;
|
}
|
if(hash >= 64535 && hash < 65535){
|
totalx++ ;
|
}
|
if(hash > 65535){
|
totaly++ ;
|
}
|
//System.out.println(rtuAddr + "-" + crc);
|
}
|
Long end = System.currentTimeMillis() ;
|
System.out.println("用时" + ":" + (end - start));
|
System.out.println("0以下" + ":" + total0);
|
System.out.println("0-1000" + ":" + total1);
|
System.out.println("1000-2000" + ":" + total2);
|
System.out.println("2000-3000" + ":" + total3);
|
System.out.println("3000-4000" + ":" + total4);
|
System.out.println("5000-6000" + ":" + total5);
|
System.out.println("64535-65535" + ":" + totalx);
|
System.out.println("65535以上" + ":" + totaly);
|
System.out.println("=================");
|
}
|
public static void test2() {
|
String regionNum = "110000" ;
|
String temp = "" ;
|
String rtuAddr = "" ;
|
|
int total1 = 0 ;
|
int total2 = 0 ;
|
int total3 = 0 ;
|
int total4 = 0 ;
|
int total5 = 0 ;
|
MurmurHash mhash = new MurmurHash() ;
|
Long start = System.currentTimeMillis() ;
|
for(int i = 0 ; i < 10000; i++){
|
temp = "" ;
|
temp = "" + i ;
|
while(temp.length() < 5){
|
temp = "0" + temp ;
|
}
|
rtuAddr = regionNum + temp ;
|
int hash = mhash.hash32(rtuAddr) ;
|
if(hash > 0 && hash < 10000000){
|
total1++ ;
|
}
|
if(hash >= 10000000 && hash < 20000000){
|
total2++ ;
|
}
|
if(hash >= 20000000 && hash < 30000000){
|
total3++ ;
|
}
|
if(hash >= 30000000 && hash < 40000000){
|
total4++ ;
|
}
|
if(hash >= 40000000 && hash < 50000000){
|
total5++ ;
|
}
|
//System.out.println(rtuAddr + "-" + crc);
|
}
|
Long end = System.currentTimeMillis() ;
|
System.out.println("用时" + ":" + (end - start));
|
System.out.println("0-10000000" + ":" + total1);
|
System.out.println("10000000-20000000" + ":" + total2);
|
System.out.println("20000000-30000000" + ":" + total3);
|
System.out.println("30000000-40000000" + ":" + total4);
|
System.out.println("50000000-60000000" + ":" + total5);
|
System.out.println("=================");
|
}
|
}
|