00001 import java.io.*; 00002 00069 public class CodepositionReplacement 00070 extends InputReplacement implements ReplacementFactory { 00071 00072 public final static String REPLACEMENT_CHARACTER = "\uFFFD"; 00073 String prefix = null; 00074 String suffix = null; 00075 String placeHolder = "XXXX"; 00076 String replinfo = null; 00077 boolean ignorecase = false; 00078 int radix = 16; 00079 String charset = null; 00080 int maxCodepos = 0xFFFF; 00081 private String cache = null; 00082 private int pfxLength; 00083 private int codepos; 00084 private String continuation1; 00085 private String continuation2; 00086 public synchronized String getReplacement(String input) { 00087 parseInput(input); 00088 if (charset==null) 00089 return new Character((char)codepos).toString(); 00090 else 00091 try { 00092 String encoded = new String(new byte[] { (byte)codepos }, charset); 00093 if (encoded.length()==0) // Encoding does not seem single byte 00094 throw new UnsupportedOperationException 00095 ("Charset "+charset+" unsupported, probably not single byte"); 00096 return encoded; 00097 } catch (UnsupportedEncodingException e) { 00098 return REPLACEMENT_CHARACTER; 00099 } 00100 } 00101 public synchronized String replacesPrefixOf(String input) { 00102 parseInput(input); 00103 if (pfxLength==0) return null; 00104 return input.substring(0,pfxLength); 00105 } 00106 private void parseInput(String input) { 00107 if (ignorecase) input = input.toUpperCase(); 00108 if (cache==input || input.equals(cache)) return; 00109 cache = input; 00110 pfxLength = 0; codepos = 0; 00111 continuation1 = null; 00112 continuation2 = null; 00113 if (prefix!=null && !input.startsWith(prefix)) { 00114 if (prefix.startsWith(input)) 00115 continuation1 = getInputInfo(); 00116 return; 00117 } 00118 int pos = 0; 00119 int len = input.length(); 00120 if (prefix!=null) pos = prefix.length(); 00121 boolean oneDigit = false; 00122 while (pos < len) { 00123 int digit = Character.digit(input.charAt(pos),radix); 00124 00125 // If the suffix follows: 00126 // Prefix match if there was a digit 00127 // No match possible otherwise 00128 if (suffix!=null && input.startsWith(suffix,pos)) { 00129 if (oneDigit) 00130 pfxLength = pos+suffix.length(); 00131 return; } 00132 00133 // If the new character is no digit or would exceede maxCodepos: 00134 // Prefix match if there was a digit and there is no suffix. 00135 // No match possible otherwise 00136 if (digit==-1 || codepos*radix+digit > maxCodepos) { 00137 if (oneDigit && suffix==null) 00138 pfxLength = pos; 00139 return; } 00140 00141 // We have found a digit 00142 oneDigit = true; 00143 codepos = codepos*radix+digit; 00144 00145 pos++; 00146 } 00147 00148 // End of string: 00149 // If there is a suffix, no match, continuations see below 00150 // If there is no suffix, and there is a digit, match 00151 // If there is no suffix, and radix*codepos<=maxCodepos, continuation is possible 00152 if (suffix!=null) { 00153 if (oneDigit) continuation1 = input+suffix; 00154 if (radix*codepos<=maxCodepos) 00155 continuation2 = input+placeHolder+suffix; 00156 if (continuation1==null && continuation2!=null) { 00157 continuation1=continuation2; 00158 continuation2=null; } 00159 } else { 00160 if (oneDigit) pfxLength = len; 00161 if (radix*codepos<=maxCodepos) 00162 continuation1 = input+placeHolder; 00163 } 00164 } 00165 public synchronized String[] isContinuationOf(String input) { 00166 parseInput(input); 00167 if (continuation1==null) return null; 00168 if (continuation2==null) 00169 return new String[] { continuation1 }; 00170 return new String[] { continuation1, continuation2 }; 00171 } 00172 public String getInputInfo() { 00173 return (prefix!=null?prefix:"")+placeHolder+ 00174 (suffix!=null?suffix:""); } 00175 public String getReplacementInfo() { 00176 return replinfo; }; 00177 00178 public CodepositionReplacement(BufferedReader input) 00179 throws IOException, FileFormatException { 00180 while (true) { 00181 String line=input.readLine(); 00182 if (line==null) { 00183 throw new FileFormatException 00184 ("EOF in CodepositionReplacement"); 00185 } else if (line.equals("###")) { 00186 break; 00187 } else if (line.equals("")) { 00188 } else if (line.equals(":PREFIX")) { 00189 prefix = input.readLine(); 00190 if (prefix==null) 00191 throw new FileFormatException 00192 ("in CodepositionReplacement: :PREFIX not followed by a line"); 00193 } else if (line.equals(":SUFFIX")) { 00194 suffix = input.readLine(); 00195 if (suffix==null) 00196 throw new FileFormatException 00197 ("in CodepositionReplacement: :SUFFIX not followed by a line"); 00198 } else if (line.equals(":RADIX")) { 00199 String str = input.readLine(); 00200 if (str==null) 00201 throw new FileFormatException 00202 ("in CodepositionReplacement: :RADIX not followed by a line"); 00203 try { 00204 radix = Integer.parseInt(str); 00205 } catch (NumberFormatException e) { 00206 throw new FileFormatException 00207 ("in CodepositionReplacement: :RADIX not followed by an integer"); 00208 } 00209 } else if (line.equals(":CHARSET")) { 00210 charset = input.readLine(); 00211 if (charset==null) 00212 throw new FileFormatException 00213 ("in CodepositionReplacement: :CHARSET not followed by a line"); 00214 if (charset.toLowerCase().equals("unicode")) { 00215 charset = null; 00216 } else { 00217 try { 00218 new String(new byte[0], charset); 00219 } catch (UnsupportedEncodingException e) { 00220 throw new FileFormatException 00221 ("in CodepositionReplacement: Unsupported charset "+charset+": "+e); 00222 } 00223 maxCodepos = 0xFF; 00224 } 00225 } else if (line.equals(":PLACEHOLDER")) { 00226 placeHolder = input.readLine(); 00227 if (placeHolder==null) 00228 throw new FileFormatException 00229 ("in CodepositionReplacement: :PLACEHOLDER not followed by a line"); 00230 } else if (line.equals(":INFO")) { 00231 replinfo = input.readLine(); 00232 if (replinfo==null) 00233 throw new FileFormatException 00234 ("in CodepositionReplacement: :INFO not followed by a line"); 00235 00236 } else if (line.equals(":IGNORECASE")) { 00237 ignorecase = true; 00238 } else { 00239 throw new FileFormatException 00240 ("in CodepositionReplacement: Unknown command "+line); 00241 } 00242 } 00243 00244 if (replinfo==null) { 00245 if (charset==null) 00246 replinfo = "Unicode character "+placeHolder; 00247 else 00248 replinfo = charset.toUpperCase()+" character "+placeHolder; 00249 } 00250 00251 if (ignorecase) { 00252 if (prefix!=null) prefix = prefix.toUpperCase(); 00253 if (suffix!=null) suffix = suffix.toUpperCase(); 00254 } 00255 } 00256 }
1.3.7