1月28日
My current project needed a way to convert between 128 bit numbers (e.g. java.util.UUID) and strings. A special requirement is that the strings be legal Java identifiers, because we use them for variable names in generated code. For the same reason, I wanted to ensure that the strings were of minimal size.
I was able to come up with a trivial algorithm in a few minutes that works OK, but I felt that I should be able to come up with the most optimal solution given a little more time. It turns out that the optimal solution consumed most of this weekend, but I finally got it working.
There's no way I can bill my customer for this exercise, so I thought I'd post it here in case anyone else finds it interesting. I'm also curious to see if anyone can come up with a better or faster solution.
Basically my approach is to treat the 128 bit number as up to 21 groups of 6 bits each. This gives me 64 possible characters which works out perfectly, because Java has exactly 64 legal characters for identifiers (0-9, A-Z, a-z, _, and $). I also prepend an additional underscore to avoid illegal starting characters and clashes with keywords and literals.
For now, here's the Java source to ponder. Tomorrow I'll discuss the code in more detail, and post a VB version which I actually wrote first, then ported to Java.
001
002 public final class Utils {
003
004 private static final byte bUnder = 95;
005 private static final byte bDollar = 36;
006 private static final byte bQuest = 63;
007 private static final byte bFirstUAlpha = 65;
008 private static final byte bFirstLAlpha = 97;
009 private static final byte bFirstNum = 48;
010 private static final long mask6Bits = 63;
011 private static final long mask4Bits = 15;
012 private static final long mask2Bits = 3;
013 private static final int bitsPerChar = 6;
014
015 private static final char[] charMap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_$"
016 .toCharArray();
017
018 public final static class Int128 {
019 public final long msb;
020 public final long lsb;
021 public Int128(long m, long l) {
022 msb = m;
023 lsb = l;
024 }
025 public Int128() {
026 this(0, 0);
027 }
028 @Override
029 public final String toString() {
030 return "" + msb + "," + lsb;
031 }
032 }
033
034 public static String int128ToStr(long msb, long lsb) {
035 final int MAX_BYTE = 31;
036 char[] buf = new char[32];
037 int i = MAX_BYTE;
038
039 // Process the least significant bits
040 // 64 bit number has 10 six bit encoded values plus four bits left over
041 for (int n = 0; n < 10; ++n) {
042 if (lsb == 0 && msb == 0) {
043 break; // Eliminate leading zeros
044 }
045 byte b = (byte) (lsb & mask6Bits);
046 buf[i--] = charMap[b];
047 lsb >>>= bitsPerChar;
048 }
049
050 // 4 bits from the lsb, and 2 from the msb
051 if (lsb != 0 || msb != 0 || i == MAX_BYTE) {
052 long leftOver = lsb & 15;
053 long firstTwo = msb & 3;
054 byte b = (byte) ((firstTwo << 4) | leftOver);
055 buf[i--] = charMap[b];
056 msb >>>= 2;
057 }
058
059 // Process the most significant bits
060 while (msb != 0) {
061 byte b = (byte) (msb & mask6Bits);
062 buf[i--] = charMap[b];
063 msb >>>= bitsPerChar;
064 }
065
066 // It's easiest to simply prefix an underscore to avoid
067 // illegal identifiers and clashes with keywords and literals.
068 buf[i--] = '_';
069
070 return new String(buf, i + 1, MAX_BYTE - i);
071 }
072
073 public static Int128 strToInt128(String s) {
074 byte[] buf = s.getBytes();
075 int maxByte = buf.length - 1;
076 int i = maxByte;
077 int minByte = 0;
078 if (buf[0] == bUnder) {
079 minByte = 1;
080 }
081 long msb = 0;
082 long lsb = 0;
083
084 for (int n = 0; n < 10; ++n) {
085 if (i < minByte) {
086 return new Int128(msb, lsb);
087 }
088 long b = charToMod64(buf[i]);
089 if (b != 0) {
090 if (n != 0) {
091 b <<= (n * 6);
092 }
093 lsb |= b;
094 }
095 --i;
096 }
097
098 if (i >= minByte) {
099 long b = charToMod64(buf[i]);
100 if (b != 0) {
101 long leftOver = b & mask4Bits;
102 msb = (b >>> 4) & mask2Bits;
103 leftOver <<= 60;
104 lsb |= leftOver;
105 }
106 --i;
107 }
108
109 for (int n = 0; n < 11; ++n) {
110 if (i < minByte) {
111 return new Int128(msb, lsb);
112 }
113 long b = charToMod64(buf[i]);
114 if (b != 0) {
115 b <<= (n * bitsPerChar + 2);
116 msb |= b;
117 }
118 --i;
119 }
120 return new Int128(msb, lsb);
121 }
122
123 private static byte charToMod64(byte c) {
124 if (c >= bFirstNum && c <= bFirstNum + 10) {
125 return (byte) (c - 48);
126 } else if (c >= bFirstUAlpha && c <= bFirstUAlpha + 26) {
127 return (byte) (c - bFirstUAlpha + 10);
128 } else {
129 if (c >= bFirstLAlpha && c <= bFirstLAlpha + 26) {
130 return (byte) (c - bFirstLAlpha + 26 + 10);
131 } else if (c == bUnder) {
132 return 62;
133 } else if (c == bDollar) {
134 return 63;
135 } else {
136 assert false;
137 return 0;
138 }
139 }
140 }
141 }
|
|
Java2html
|