1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.bcel.util;
18
19 import java.util.ArrayList;
20 import java.util.HashMap;
21 import java.util.Iterator;
22 import java.util.List;
23 import java.util.Locale;
24 import java.util.Map;
25 import java.util.regex.Matcher;
26 import java.util.regex.Pattern;
27 import org.apache.bcel.Constants;
28 import org.apache.bcel.generic.ClassGenException;
29 import org.apache.bcel.generic.Instruction;
30 import org.apache.bcel.generic.InstructionHandle;
31 import org.apache.bcel.generic.InstructionList;
32
33 /***
34 * InstructionFinder is a tool to search for given instructions patterns, i.e.,
35 * match sequences of instructions in an instruction list via regular
36 * expressions. This can be used, e.g., in order to implement a peep hole
37 * optimizer that looks for code patterns and replaces them with faster
38 * equivalents.
39 *
40 * <p>
41 * This class internally uses the <a href="http://jakarta.apache.org/regexp/">
42 * Regexp</a> package to search for regular expressions.
43 *
44 * A typical application would look like this:
45 *
46 * <pre>
47 *
48 *
49 * InstructionFinder f = new InstructionFinder(il);
50 * String pat = "IfInstruction ICONST_0 GOTO ICONST_1 NOP (IFEQ|IFNE)";
51 *
52 * for(Iterator i = f.search(pat, constraint); i.hasNext(); ) {
53 * InstructionHandle[] match = (InstructionHandle[])i.next();
54 * ...
55 * il.delete(match[1], match[5]);
56 * ...
57 * }
58 *
59 *
60 * </pre>
61 *
62 * @version $Id: InstructionFinder.java 386056 2006-03-15 11:31:56Z tcurdt $
63 * @author <A HREF="mailto:m.dahm@gmx.de">M. Dahm</A>
64 * @see Instruction
65 * @see InstructionList
66 */
67 public class InstructionFinder {
68
69 private static final int OFFSET = 32767;
70
71
72 private static final int NO_OPCODES = 256;
73
74 private static final Map map = new HashMap();
75 private InstructionList il;
76 private String il_string;
77
78 private InstructionHandle[] handles;
79
80
81
82 /***
83 * @param il
84 * instruction list to search for given patterns
85 */
86 public InstructionFinder(InstructionList il) {
87 this.il = il;
88 reread();
89 }
90
91
92 /***
93 * Reread the instruction list, e.g., after you've altered the list upon a
94 * match.
95 */
96 public final void reread() {
97 int size = il.getLength();
98 char[] buf = new char[size];
99
100 handles = il.getInstructionHandles();
101
102 for (int i = 0; i < size; i++) {
103 buf[i] = makeChar(handles[i].getInstruction().getOpcode());
104 }
105 il_string = new String(buf);
106 }
107
108
109 /***
110 * Map symbolic instruction names like "getfield" to a single character.
111 *
112 * @param pattern
113 * instruction pattern in lower case
114 * @return encoded string for a pattern such as "BranchInstruction".
115 */
116 private static final String mapName( String pattern ) {
117 String result = (String) map.get(pattern);
118 if (result != null) {
119 return result;
120 }
121 for (short i = 0; i < NO_OPCODES; i++) {
122 if (pattern.equals(Constants.OPCODE_NAMES[i])) {
123 return "" + makeChar(i);
124 }
125 }
126 throw new RuntimeException("Instruction unknown: " + pattern);
127 }
128
129
130 /***
131 * Replace symbolic names of instructions with the appropiate character and
132 * remove all white space from string. Meta characters such as +, * are
133 * ignored.
134 *
135 * @param pattern
136 * The pattern to compile
137 * @return translated regular expression string
138 */
139 private static final String compilePattern( String pattern ) {
140
141 String lower = pattern.toLowerCase(Locale.ENGLISH);
142 StringBuffer buf = new StringBuffer();
143 int size = pattern.length();
144 for (int i = 0; i < size; i++) {
145 char ch = lower.charAt(i);
146 if (Character.isLetterOrDigit(ch)) {
147 StringBuffer name = new StringBuffer();
148 while ((Character.isLetterOrDigit(ch) || ch == '_') && i < size) {
149 name.append(ch);
150 if (++i < size) {
151 ch = lower.charAt(i);
152 } else {
153 break;
154 }
155 }
156 i--;
157 buf.append(mapName(name.toString()));
158 } else if (!Character.isWhitespace(ch)) {
159 buf.append(ch);
160 }
161 }
162 return buf.toString();
163 }
164
165
166 /***
167 * @return the matched piece of code as an array of instruction (handles)
168 */
169 private InstructionHandle[] getMatch( int matched_from, int match_length ) {
170 InstructionHandle[] match = new InstructionHandle[match_length];
171 System.arraycopy(handles, matched_from, match, 0, match_length);
172 return match;
173 }
174
175
176 /***
177 * Search for the given pattern in the instruction list. You can search for
178 * any valid opcode via its symbolic name, e.g. "istore". You can also use a
179 * super class or an interface name to match a whole set of instructions, e.g.
180 * "BranchInstruction" or "LoadInstruction". "istore" is also an alias for all
181 * "istore_x" instructions. Additional aliases are "if" for "ifxx", "if_icmp"
182 * for "if_icmpxx", "if_acmp" for "if_acmpxx".
183 *
184 * Consecutive instruction names must be separated by white space which will
185 * be removed during the compilation of the pattern.
186 *
187 * For the rest the usual pattern matching rules for regular expressions
188 * apply.
189 * <P>
190 * Example pattern:
191 *
192 * <pre>
193 * search("BranchInstruction NOP ((IfInstruction|GOTO)+ ISTORE Instruction)*");
194 * </pre>
195 *
196 * <p>
197 * If you alter the instruction list upon a match such that other matching
198 * areas are affected, you should call reread() to update the finder and call
199 * search() again, because the matches are cached.
200 *
201 * @param pattern
202 * the instruction pattern to search for, where case is ignored
203 * @param from
204 * where to start the search in the instruction list
205 * @param constraint
206 * optional CodeConstraint to check the found code pattern for
207 * user-defined constraints
208 * @return iterator of matches where e.nextElement() returns an array of
209 * instruction handles describing the matched area
210 */
211 public final Iterator search( String pattern, InstructionHandle from, CodeConstraint constraint ) {
212 String search = compilePattern(pattern);
213 int start = -1;
214 for (int i = 0; i < handles.length; i++) {
215 if (handles[i] == from) {
216 start = i;
217 break;
218 }
219 }
220 if (start == -1) {
221 throw new ClassGenException("Instruction handle " + from
222 + " not found in instruction list.");
223 }
224 Pattern regex = Pattern.compile(search);
225 List matches = new ArrayList();
226 Matcher matcher = regex.matcher(il_string);
227 while (start < il_string.length() && matcher.find(start)) {
228 int startExpr = matcher.start();
229 int endExpr = matcher.end();
230 int lenExpr = (endExpr - startExpr) + 1;
231 InstructionHandle[] match = getMatch(startExpr, lenExpr);
232 if ((constraint == null) || constraint.checkCode(match)) {
233 matches.add(match);
234 }
235 start = endExpr;
236 }
237 return matches.iterator();
238 }
239
240
241 /***
242 * Start search beginning from the start of the given instruction list.
243 *
244 * @param pattern
245 * the instruction pattern to search for, where case is ignored
246 * @return iterator of matches where e.nextElement() returns an array of
247 * instruction handles describing the matched area
248 */
249 public final Iterator search( String pattern ) {
250 return search(pattern, il.getStart(), null);
251 }
252
253
254 /***
255 * Start search beginning from `from'.
256 *
257 * @param pattern
258 * the instruction pattern to search for, where case is ignored
259 * @param from
260 * where to start the search in the instruction list
261 * @return iterator of matches where e.nextElement() returns an array of
262 * instruction handles describing the matched area
263 */
264 public final Iterator search( String pattern, InstructionHandle from ) {
265 return search(pattern, from, null);
266 }
267
268
269 /***
270 * Start search beginning from the start of the given instruction list. Check
271 * found matches with the constraint object.
272 *
273 * @param pattern
274 * the instruction pattern to search for, case is ignored
275 * @param constraint
276 * constraints to be checked on matching code
277 * @return instruction handle or `null' if the match failed
278 */
279 public final Iterator search( String pattern, CodeConstraint constraint ) {
280 return search(pattern, il.getStart(), constraint);
281 }
282
283
284 /***
285 * Convert opcode number to char.
286 */
287 private static final char makeChar( short opcode ) {
288 return (char) (opcode + OFFSET);
289 }
290
291
292 /***
293 * @return the inquired instruction list
294 */
295 public final InstructionList getInstructionList() {
296 return il;
297 }
298
299 /***
300 * Code patterns found may be checked using an additional user-defined
301 * constraint object whether they really match the needed criterion. I.e.,
302 * check constraints that can not expressed with regular expressions.
303 *
304 */
305 public static interface CodeConstraint {
306
307 /***
308 * @param match
309 * array of instructions matching the requested pattern
310 * @return true if the matched area is really useful
311 */
312 public boolean checkCode( InstructionHandle[] match );
313 }
314
315
316 static {
317 map
318 .put(
319 "arithmeticinstruction",
320 "(irem|lrem|iand|ior|ineg|isub|lneg|fneg|fmul|ldiv|fadd|lxor|frem|idiv|land|ixor|ishr|fsub|lshl|fdiv|iadd|lor|dmul|lsub|ishl|imul|lmul|lushr|dneg|iushr|lshr|ddiv|drem|dadd|ladd|dsub)");
321 map.put("invokeinstruction", "(invokevirtual|invokeinterface|invokestatic|invokespecial)");
322 map
323 .put(
324 "arrayinstruction",
325 "(baload|aastore|saload|caload|fastore|lastore|iaload|castore|iastore|aaload|bastore|sastore|faload|laload|daload|dastore)");
326 map.put("gotoinstruction", "(goto|goto_w)");
327 map.put("conversioninstruction",
328 "(d2l|l2d|i2s|d2i|l2i|i2b|l2f|d2f|f2i|i2d|i2l|f2d|i2c|f2l|i2f)");
329 map.put("localvariableinstruction",
330 "(fstore|iinc|lload|dstore|dload|iload|aload|astore|istore|fload|lstore)");
331 map.put("loadinstruction", "(fload|dload|lload|iload|aload)");
332 map.put("fieldinstruction", "(getfield|putstatic|getstatic|putfield)");
333 map
334 .put(
335 "cpinstruction",
336 "(ldc2_w|invokeinterface|multianewarray|putstatic|instanceof|getstatic|checkcast|getfield|invokespecial|ldc_w|invokestatic|invokevirtual|putfield|ldc|new|anewarray)");
337 map.put("stackinstruction", "(dup2|swap|dup2_x2|pop|pop2|dup|dup2_x1|dup_x2|dup_x1)");
338 map
339 .put(
340 "branchinstruction",
341 "(ifle|if_acmpne|if_icmpeq|if_acmpeq|ifnonnull|goto_w|iflt|ifnull|if_icmpne|tableswitch|if_icmple|ifeq|if_icmplt|jsr_w|if_icmpgt|ifgt|jsr|goto|ifne|ifge|lookupswitch|if_icmpge)");
342 map.put("returninstruction", "(lreturn|ireturn|freturn|dreturn|areturn|return)");
343 map.put("storeinstruction", "(istore|fstore|dstore|astore|lstore)");
344 map.put("select", "(tableswitch|lookupswitch)");
345 map
346 .put(
347 "ifinstruction",
348 "(ifeq|ifgt|if_icmpne|if_icmpeq|ifge|ifnull|ifne|if_icmple|if_icmpge|if_acmpeq|if_icmplt|if_acmpne|ifnonnull|iflt|if_icmpgt|ifle)");
349 map.put("jsrinstruction", "(jsr|jsr_w)");
350 map.put("variablelengthinstruction", "(tableswitch|jsr|goto|lookupswitch)");
351 map.put("unconditionalbranch", "(goto|jsr|jsr_w|athrow|goto_w)");
352 map.put("constantpushinstruction", "(dconst|bipush|sipush|fconst|iconst|lconst)");
353 map
354 .put(
355 "typedinstruction",
356 "(imul|lsub|aload|fload|lor|new|aaload|fcmpg|iand|iaload|lrem|idiv|d2l|isub|dcmpg|dastore|ret|f2d|f2i|drem|iinc|i2c|checkcast|frem|lreturn|astore|lushr|daload|dneg|fastore|istore|lshl|ldiv|lstore|areturn|ishr|ldc_w|invokeinterface|aastore|lxor|ishl|l2d|i2f|return|faload|sipush|iushr|caload|instanceof|invokespecial|putfield|fmul|ireturn|laload|d2f|lneg|ixor|i2l|fdiv|lastore|multianewarray|i2b|getstatic|i2d|putstatic|fcmpl|saload|ladd|irem|dload|jsr_w|dconst|dcmpl|fsub|freturn|ldc|aconst_null|castore|lmul|ldc2_w|dadd|iconst|f2l|ddiv|dstore|land|jsr|anewarray|dmul|bipush|dsub|sastore|d2i|i2s|lshr|iadd|l2i|lload|bastore|fstore|fneg|iload|fadd|baload|fconst|ior|ineg|dreturn|l2f|lconst|getfield|invokevirtual|invokestatic|iastore)");
357 map.put("popinstruction", "(fstore|dstore|pop|pop2|astore|putstatic|istore|lstore)");
358 map.put("allocationinstruction", "(multianewarray|new|anewarray|newarray)");
359 map
360 .put(
361 "indexedinstruction",
362 "(lload|lstore|fload|ldc2_w|invokeinterface|multianewarray|astore|dload|putstatic|instanceof|getstatic|checkcast|getfield|invokespecial|dstore|istore|iinc|ldc_w|ret|fstore|invokestatic|iload|putfield|invokevirtual|ldc|new|aload|anewarray)");
363 map
364 .put(
365 "pushinstruction",
366 "(dup|lload|dup2|bipush|fload|ldc2_w|sipush|lconst|fconst|dload|getstatic|ldc_w|aconst_null|dconst|iload|ldc|iconst|aload)");
367 map
368 .put(
369 "stackproducer",
370 "(imul|lsub|aload|fload|lor|new|aaload|fcmpg|iand|iaload|lrem|idiv|d2l|isub|dcmpg|dup|f2d|f2i|drem|i2c|checkcast|frem|lushr|daload|dneg|lshl|ldiv|ishr|ldc_w|invokeinterface|lxor|ishl|l2d|i2f|faload|sipush|iushr|caload|instanceof|invokespecial|fmul|laload|d2f|lneg|ixor|i2l|fdiv|getstatic|i2b|swap|i2d|dup2|fcmpl|saload|ladd|irem|dload|jsr_w|dconst|dcmpl|fsub|ldc|arraylength|aconst_null|tableswitch|lmul|ldc2_w|iconst|dadd|f2l|ddiv|land|jsr|anewarray|dmul|bipush|dsub|d2i|newarray|i2s|lshr|iadd|lload|l2i|fneg|iload|fadd|baload|fconst|lookupswitch|ior|ineg|lconst|l2f|getfield|invokevirtual|invokestatic)");
371 map
372 .put(
373 "stackconsumer",
374 "(imul|lsub|lor|iflt|fcmpg|if_icmpgt|iand|ifeq|if_icmplt|lrem|ifnonnull|idiv|d2l|isub|dcmpg|dastore|if_icmpeq|f2d|f2i|drem|i2c|checkcast|frem|lreturn|astore|lushr|pop2|monitorexit|dneg|fastore|istore|lshl|ldiv|lstore|areturn|if_icmpge|ishr|monitorenter|invokeinterface|aastore|lxor|ishl|l2d|i2f|return|iushr|instanceof|invokespecial|fmul|ireturn|d2f|lneg|ixor|pop|i2l|ifnull|fdiv|lastore|i2b|if_acmpeq|ifge|swap|i2d|putstatic|fcmpl|ladd|irem|dcmpl|fsub|freturn|ifgt|castore|lmul|dadd|f2l|ddiv|dstore|land|if_icmpne|if_acmpne|dmul|dsub|sastore|ifle|d2i|i2s|lshr|iadd|l2i|bastore|fstore|fneg|fadd|ior|ineg|ifne|dreturn|l2f|if_icmple|getfield|invokevirtual|invokestatic|iastore)");
375 map
376 .put(
377 "exceptionthrower",
378 "(irem|lrem|laload|putstatic|baload|dastore|areturn|getstatic|ldiv|anewarray|iastore|castore|idiv|saload|lastore|fastore|putfield|lreturn|caload|getfield|return|aastore|freturn|newarray|instanceof|multianewarray|athrow|faload|iaload|aaload|dreturn|monitorenter|checkcast|bastore|arraylength|new|invokevirtual|sastore|ldc_w|ireturn|invokespecial|monitorexit|invokeinterface|ldc|invokestatic|daload)");
379 map
380 .put(
381 "loadclass",
382 "(multianewarray|invokeinterface|instanceof|invokespecial|putfield|checkcast|putstatic|invokevirtual|new|getstatic|invokestatic|getfield|anewarray)");
383 map
384 .put(
385 "instructiontargeter",
386 "(ifle|if_acmpne|if_icmpeq|if_acmpeq|ifnonnull|goto_w|iflt|ifnull|if_icmpne|tableswitch|if_icmple|ifeq|if_icmplt|jsr_w|if_icmpgt|ifgt|jsr|goto|ifne|ifge|lookupswitch|if_icmpge)");
387
388 map.put("if_icmp", "(if_icmpne|if_icmpeq|if_icmple|if_icmpge|if_icmplt|if_icmpgt)");
389 map.put("if_acmp", "(if_acmpeq|if_acmpne)");
390 map.put("if", "(ifeq|ifne|iflt|ifge|ifgt|ifle)");
391
392 map.put("iconst", precompile(Constants.ICONST_0, Constants.ICONST_5, Constants.ICONST_M1));
393 map.put("lconst", new String(new char[] {
394 '(', makeChar(Constants.LCONST_0), '|', makeChar(Constants.LCONST_1), ')'
395 }));
396 map.put("dconst", new String(new char[] {
397 '(', makeChar(Constants.DCONST_0), '|', makeChar(Constants.DCONST_1), ')'
398 }));
399 map.put("fconst", new String(new char[] {
400 '(', makeChar(Constants.FCONST_0), '|', makeChar(Constants.FCONST_1), ')'
401 }));
402 map.put("iload", precompile(Constants.ILOAD_0, Constants.ILOAD_3, Constants.ILOAD));
403 map.put("dload", precompile(Constants.DLOAD_0, Constants.DLOAD_3, Constants.DLOAD));
404 map.put("fload", precompile(Constants.FLOAD_0, Constants.FLOAD_3, Constants.FLOAD));
405 map.put("aload", precompile(Constants.ALOAD_0, Constants.ALOAD_3, Constants.ALOAD));
406 map.put("istore", precompile(Constants.ISTORE_0, Constants.ISTORE_3, Constants.ISTORE));
407 map.put("dstore", precompile(Constants.DSTORE_0, Constants.DSTORE_3, Constants.DSTORE));
408 map.put("fstore", precompile(Constants.FSTORE_0, Constants.FSTORE_3, Constants.FSTORE));
409 map.put("astore", precompile(Constants.ASTORE_0, Constants.ASTORE_3, Constants.ASTORE));
410
411 for (Iterator i = map.keySet().iterator(); i.hasNext();) {
412 String key = (String) i.next();
413 String value = (String) map.get(key);
414 char ch = value.charAt(1);
415 if (ch < OFFSET) {
416 map.put(key, compilePattern(value));
417 }
418 }
419
420 StringBuffer buf = new StringBuffer("(");
421 for (short i = 0; i < NO_OPCODES; i++) {
422 if (Constants.NO_OF_OPERANDS[i] != Constants.UNDEFINED) {
423
424
425 buf.append(makeChar(i));
426 if (i < NO_OPCODES - 1) {
427 buf.append('|');
428 }
429 }
430 }
431 buf.append(')');
432 map.put("instruction", buf.toString());
433 }
434
435
436 private static String precompile( short from, short to, short extra ) {
437 StringBuffer buf = new StringBuffer("(");
438 for (short i = from; i <= to; i++) {
439 buf.append(makeChar(i));
440 buf.append('|');
441 }
442 buf.append(makeChar(extra));
443 buf.append(")");
444 return buf.toString();
445 }
446
447
448
449
450
451 private static final String pattern2string( String pattern ) {
452 return pattern2string(pattern, true);
453 }
454
455
456 private static final String pattern2string( String pattern, boolean make_string ) {
457 StringBuffer buf = new StringBuffer();
458 for (int i = 0; i < pattern.length(); i++) {
459 char ch = pattern.charAt(i);
460 if (ch >= OFFSET) {
461 if (make_string) {
462 buf.append(Constants.OPCODE_NAMES[ch - OFFSET]);
463 } else {
464 buf.append((ch - OFFSET));
465 }
466 } else {
467 buf.append(ch);
468 }
469 }
470 return buf.toString();
471 }
472 }