New file |
| | |
| | | package com.dy.common.util; |
| | | |
| | | /** |
| | | * MurmurHash算法:高运算性能,低碰撞率,由Austin Appleby创建于2008年, |
| | | * 现已应用到Hadoop、libstdc++、nginx、libmemcached等开源系统。2011年 |
| | | * Appleby被Google雇佣,随后Google推出其变种的CityHash算法。 |
| | | * |
| | | * 官方网站:https://sites.google.com/site/murmurhash/ |
| | | * |
| | | * MurmurHash算法,自称超级快的hash算法,是FNV的4-5倍。官方数据如下: |
| | | * |
| | | * OneAtATime – 354.163715 mb/sec |
| | | * FNV – 443.668038 mb/sec |
| | | * SuperFastHash – 985.335173 mb/sec |
| | | * lookup3 – 988.080652 mb/sec |
| | | * MurmurHash 1.0 – 1363.293480 mb/sec |
| | | * MurmurHash 2.0 – 2056.885653 mb/sec |
| | | * |
| | | * 但也有文章声称,只有当key的长度大于10字节的时候,MurmurHash的运算速 |
| | | * 度才快于DJB。从计算速度上来看,MurmurHash只适用于已知长度的、长度比 |
| | | * 较长的字符。 |
| | | * |
| | | * 哈希值分布非常均匀,即低碰撞率 |
| | | * 比Crc16更为均匀 |
| | | */ |
| | | public final class MurmurHash { |
| | | |
| | | private byte[] toBytesWithoutEncoding(String str) { |
| | | int len = str.length(); |
| | | int pos = 0; |
| | | byte[] buf = new byte[len << 1]; |
| | | for (int i = 0; i < len; i++) { |
| | | char c = str.charAt(i); |
| | | buf[pos++] = (byte) (c & 0xFF); |
| | | buf[pos++] = (byte) (c >> 8); |
| | | } |
| | | return buf; |
| | | } |
| | | |
| | | /** |
| | | * 刘润玉增加方法于2016-12-02 |
| | | * @param data |
| | | * @param length |
| | | * @return |
| | | */ |
| | | public int hash16_plus(final byte[] data, int length) { |
| | | int hash = hash32(data, length, 0x9747b28c); |
| | | hash = hash % 65535 ; |
| | | if(hash < 0){ |
| | | hash = - hash ; |
| | | } |
| | | return hash ; |
| | | } |
| | | /** |
| | | * 刘润玉增加方法于2016-12-02 |
| | | * @param data |
| | | * @return |
| | | */ |
| | | public int hash16_plus(final String data) { |
| | | byte[] bytes = toBytesWithoutEncoding(data); |
| | | int hash = hash32(bytes, bytes.length, 0x9747b28c); |
| | | hash = hash % 65535 ; |
| | | if(hash < 0){ |
| | | hash = - hash ; |
| | | } |
| | | return hash ; |
| | | } |
| | | |
| | | /** |
| | | * Generates 32 bit hash from byte array with default seed value. |
| | | * @param data byte array to hash |
| | | * @param length length of the array to hash |
| | | * @return 32 bit hash of the given array |
| | | */ |
| | | public int hash32(final byte[] data, int length) { |
| | | return hash32(data, length, 0x9747b28c); |
| | | } |
| | | |
| | | public int hash32(final String data) { |
| | | byte[] bytes = toBytesWithoutEncoding(data); |
| | | return hash32(bytes, bytes.length, 0x9747b28c); |
| | | } |
| | | |
| | | /** |
| | | * Generates 64 bit hash from byte array with default seed value. |
| | | * @param data byte array to hash |
| | | * @param length length of the array to hash |
| | | * @return 64 bit hash of the given string |
| | | */ |
| | | public long hash64(final byte[] data, int length) { |
| | | return hash64(data, length, 0xe17a1465); |
| | | } |
| | | |
| | | |
| | | public long hash64(final String data) { |
| | | byte[] bytes = toBytesWithoutEncoding(data); |
| | | return hash64(bytes, bytes.length); |
| | | } |
| | | /** |
| | | * Generates 32 bit hash from byte array of the given length and seed. |
| | | * @param data byte array to hash |
| | | * @param length length of the array |
| | | * @param seed initial seed value |
| | | * @return 32 bit hash of the given array |
| | | */ |
| | | public int hash32(final byte[] data, int length, int seed) { |
| | | // 'm' and 'r' are mixing constants generated offline. |
| | | // They're not really 'magic', they just happen to work well. |
| | | final int m = 0x5bd1e995; |
| | | final int r = 24; |
| | | // Initialize the hash to a random value |
| | | int h = seed ^ length; |
| | | int length4 = length / 4; |
| | | for (int i = 0; i < length4; i++) { |
| | | final int i4 = i * 4; |
| | | int k = (data[i4 + 0] & 0xff) + ((data[i4 + 1] & 0xff) << 8) |
| | | + ((data[i4 + 2] & 0xff) << 16) |
| | | + ((data[i4 + 3] & 0xff) << 24); |
| | | k *= m; |
| | | k ^= k >>> r; |
| | | k *= m; |
| | | h *= m; |
| | | h ^= k; |
| | | } |
| | | // Handle the last few bytes of the input array |
| | | switch (length % 4) { |
| | | case 3: |
| | | h ^= (data[(length & ~3) + 2] & 0xff) << 16; |
| | | case 2: |
| | | h ^= (data[(length & ~3) + 1] & 0xff) << 8; |
| | | case 1: |
| | | h ^= (data[length & ~3] & 0xff); |
| | | h *= m; |
| | | } |
| | | h ^= h >>> 13; |
| | | h *= m; |
| | | h ^= h >>> 15; |
| | | return h; |
| | | } |
| | | /** |
| | | * Generates 64 bit hash from byte array of the given length and seed. |
| | | * @param data byte array to hash |
| | | * @param length length of the array to hash |
| | | * @param seed initial seed value |
| | | * @return 64 bit hash of the given array |
| | | */ |
| | | public long hash64(final byte[] data, int length, int seed) { |
| | | final long m = 0xc6a4a7935bd1e995L; |
| | | final int r = 47; |
| | | long h = (seed & 0xffffffffl) ^ (length * m); |
| | | int length8 = length / 8; |
| | | for (int i = 0; i < length8; i++) { |
| | | final int i8 = i * 8; |
| | | long k = ((long) data[i8 + 0] & 0xff) |
| | | + (((long) data[i8 + 1] & 0xff) << 8) |
| | | + (((long) data[i8 + 2] & 0xff) << 16) |
| | | + (((long) data[i8 + 3] & 0xff) << 24) |
| | | + (((long) data[i8 + 4] & 0xff) << 32) |
| | | + (((long) data[i8 + 5] & 0xff) << 40) |
| | | + (((long) data[i8 + 6] & 0xff) << 48) |
| | | + (((long) data[i8 + 7] & 0xff) << 56); |
| | | k *= m; |
| | | k ^= k >>> r; |
| | | k *= m; |
| | | h ^= k; |
| | | h *= m; |
| | | } |
| | | switch (length % 8) { |
| | | case 7: |
| | | h ^= (long) (data[(length & ~7) + 6] & 0xff) << 48; |
| | | case 6: |
| | | h ^= (long) (data[(length & ~7) + 5] & 0xff) << 40; |
| | | case 5: |
| | | h ^= (long) (data[(length & ~7) + 4] & 0xff) << 32; |
| | | case 4: |
| | | h ^= (long) (data[(length & ~7) + 3] & 0xff) << 24; |
| | | case 3: |
| | | h ^= (long) (data[(length & ~7) + 2] & 0xff) << 16; |
| | | case 2: |
| | | h ^= (long) (data[(length & ~7) + 1] & 0xff) << 8; |
| | | case 1: |
| | | h ^= (long) (data[length & ~7] & 0xff); |
| | | h *= m; |
| | | } |
| | | ; |
| | | h ^= h >>> r; |
| | | h *= m; |
| | | h ^= h >>> r; |
| | | return h; |
| | | } |
| | | public static void main(String[] args) { |
| | | test1() ; |
| | | test2() ; |
| | | } |
| | | |
| | | public static void test1() { |
| | | String regionNum = "110000" ; |
| | | String temp = "" ; |
| | | String rtuAddr = "" ; |
| | | |
| | | int total0 = 0 ; |
| | | int total1 = 0 ; |
| | | int total2 = 0 ; |
| | | int total3 = 0 ; |
| | | int total4 = 0 ; |
| | | int total5 = 0 ; |
| | | int totalx = 0 ; |
| | | int totaly = 0 ; |
| | | MurmurHash mhash = new MurmurHash() ; |
| | | Long start = System.currentTimeMillis() ; |
| | | for(int i = 0 ; i < 10000; i++){ |
| | | temp = "" + i ; |
| | | while(temp.length() < 5){ |
| | | temp = "0" + temp ; |
| | | } |
| | | rtuAddr = regionNum + temp ; |
| | | int hash = mhash.hash16_plus(rtuAddr) ; |
| | | if(hash < 0){ |
| | | total0++ ; |
| | | } |
| | | if(hash >= 0 && hash < 1000){ |
| | | total1++ ; |
| | | } |
| | | if(hash >= 1000 && hash < 2000){ |
| | | total2++ ; |
| | | } |
| | | if(hash >= 2000 && hash < 3000){ |
| | | total3++ ; |
| | | } |
| | | if(hash >= 3000 && hash < 4000){ |
| | | total4++ ; |
| | | } |
| | | if(hash >= 4000 && hash < 5000){ |
| | | total5++ ; |
| | | } |
| | | if(hash >= 64535 && hash < 65535){ |
| | | totalx++ ; |
| | | } |
| | | if(hash > 65535){ |
| | | totaly++ ; |
| | | } |
| | | //System.out.println(rtuAddr + "-" + crc); |
| | | } |
| | | Long end = System.currentTimeMillis() ; |
| | | System.out.println("用时" + ":" + (end - start)); |
| | | System.out.println("0以下" + ":" + total0); |
| | | System.out.println("0-1000" + ":" + total1); |
| | | System.out.println("1000-2000" + ":" + total2); |
| | | System.out.println("2000-3000" + ":" + total3); |
| | | System.out.println("3000-4000" + ":" + total4); |
| | | System.out.println("5000-6000" + ":" + total5); |
| | | System.out.println("64535-65535" + ":" + totalx); |
| | | System.out.println("65535以上" + ":" + totaly); |
| | | System.out.println("================="); |
| | | } |
| | | public static void test2() { |
| | | String regionNum = "110000" ; |
| | | String temp = "" ; |
| | | String rtuAddr = "" ; |
| | | |
| | | int total1 = 0 ; |
| | | int total2 = 0 ; |
| | | int total3 = 0 ; |
| | | int total4 = 0 ; |
| | | int total5 = 0 ; |
| | | MurmurHash mhash = new MurmurHash() ; |
| | | Long start = System.currentTimeMillis() ; |
| | | for(int i = 0 ; i < 10000; i++){ |
| | | temp = "" ; |
| | | temp = "" + i ; |
| | | while(temp.length() < 5){ |
| | | temp = "0" + temp ; |
| | | } |
| | | rtuAddr = regionNum + temp ; |
| | | int hash = mhash.hash32(rtuAddr) ; |
| | | if(hash > 0 && hash < 10000000){ |
| | | total1++ ; |
| | | } |
| | | if(hash >= 10000000 && hash < 20000000){ |
| | | total2++ ; |
| | | } |
| | | if(hash >= 20000000 && hash < 30000000){ |
| | | total3++ ; |
| | | } |
| | | if(hash >= 30000000 && hash < 40000000){ |
| | | total4++ ; |
| | | } |
| | | if(hash >= 40000000 && hash < 50000000){ |
| | | total5++ ; |
| | | } |
| | | //System.out.println(rtuAddr + "-" + crc); |
| | | } |
| | | Long end = System.currentTimeMillis() ; |
| | | System.out.println("用时" + ":" + (end - start)); |
| | | System.out.println("0-10000000" + ":" + total1); |
| | | System.out.println("10000000-20000000" + ":" + total2); |
| | | System.out.println("20000000-30000000" + ":" + total3); |
| | | System.out.println("30000000-40000000" + ":" + total4); |
| | | System.out.println("50000000-60000000" + ":" + total5); |
| | | System.out.println("================="); |
| | | } |
| | | } |