REPAIRING PROGRAMS WITH SEMANTIC CODE SEARCH Yalin Ke Kathryn T . Stolee Claire Le Goues Yuriy Brun Iowa State Iowa State Carnegie Mellon UMass Amherst 1
Input: Output: buggy fixed program, program tests SearchRepair Potential patches Uses semantic code search 2
3 CC BY-NC: Fir0002/Flagstaffotos
PROBLEM PATCH QUALITY 4
Input: Output: buggy fixed program, program tests Automatic repair magic Potential patches Performance on withheld tests? 5
OVERFITTING Does the patch generalize beyond the test cases used to create it? Edward K. Smith, Earl Barr, Claire Le Goues, and Yuriy Brun, Is the Cure Worse than the Disease? Overfitting in Automated Program Repair, ESEC/FSE 2015. 6
Input: Output: buggy fixed program, program tests SearchRepair Potential patches Performance on withheld tests! 7
COMPUTE THE MEDIAN OF THREE NUMBERS 8
int median(int a, int b, int c) { int result; if ((b<=a && a<=c) || (c<=a && a<=b)) result = a; if ((a<b && b <= c) || (c<=b && b<a)) result = b; if ((a<c && c<b) || (b<c && c<a)) result = c; return result; } 9
int median(int a, int b, int c) { int result = 0; if ((b<=a && a<=c) || (c<=a && a<=b)) result = a; if ((a<b && b <= c) || (c<=b && b<a)) result = b; if ((a<c && c<b) || (b<c && c<a)) result = c; return result; } 10
int median(int a, int b, int c) { int result = 0; if ((b<=a && a<=c) || (c<=a && a<=b)) result = a; if ((a<b && b <= c) || (c<=b && b<a)) result = b; if ((a<c && c<b) || (b<c && c<a)) result = c; return result; } 11
int median(int a, int b, int c) { int result = 0; if ((b<=a && a<=c) || (c<=a && a<=b)) result = a; if ((a<b && b <= c) || (c<=b && b<a)) result = b; if ((a<c && c<b) || (b<c && c<a)) result = c; return result; } 12
int median(int a, int b, int c) { int result = 0; if ((b<=a && a<=c) || (c<=a && a<=b)) result = a; if ((a<b && b <= c) || (c<=b && b<a)) result = b; if ((a<c && c<b) || (b<c && c<a)) result = c; return result; } 13
int median(int a, int b, int c) { int result = 0; if ( (b<=a && a<=c) || (c<=a && a<=b)) result = a; if ((a<b && b <= c) || (c<=b && b<a)) result = b; if ((a<c && c<b) || (b<c && c<a)) result = c; return result; } 14
int median(int a, int b, int c) { int result = 0; if ( (b<=a && a<=c) || (c<=a && a<=b) ) result = a; if ((a<b && b <= c) || if ((a<b && b <= c) || (c<=b && b<a)) result = b; if ((a<c && c<b) || (b<c && c<a)) result = c; return result; } 15
int median(int a, int b, int c) { int result = 0; if ( (b<=a && a<=c) || (c<=a && a<=b) ) result = a; if ( (a<b && b <= c) || (c<=b && b<a) ) result = b; if ( (a<c && c<b) || (b<c && c<a) ) result = c; return result; } 16
int med_broken(int a, int b, int c) { int result; if ((a==b) || (a==c) || (b<a && a<c) || (c<a && a<b)) result = a; else if ((b==c) || (a<b && b<c) || (c<b && b<a)) result = b; else if (a<c && c<b) result = c; return result; } 17
int med_broken(int a, int b, int c) { int result; if ((a==b) || (a==c) || (b<a && a<c) || (c<a && a<b)) result = a; else if ((b==c) || (a<b && b<c) || (c<b && b<a)) result = b; else if (a<c && c<b) result = c; return result; } 18
int med_broken(int a, int b, int c) { int result; if ((a==b) || (a==c) || (b<a && a<c) || (c<a && a<b)) result = a; else if ((b==c) || (a<b && b<c) || (c<b && b<a)) result = b; else if (a<c && c<b) result = c; return result; } 19
int med_broken(int a, int b, int c) { int result; Input Expected Pass? if ((a==b) || (a==c) || 0,0,0 0 ✓ (b<a && a<c) || 2,0,1 1 X (c<a && a<b)) 0,0,1 0 ✓ result = a; 0,1,0 0 ✓ else if ((b==c) || (a<b && b<c) || 0,2,1 1 ✓ (c<b && b<a)) 0,2,3 2 ✓ result = b; else if (a<c && c<b) result = c; return result; } 20
int med_broken(int a, int b, int c) { int result; Input Expected Pass? if ((a==b) || (a==c) || 0,0,0 0 ✓ (b<a && a<c) || 2,0,1 1 X (c<a && a<b)) 0,0,1 0 ✓ result = a; 0,1,0 0 ✓ if (b < a) 0,2,1 1 ✓ result = c; 0,2,3 2 ✓ else if (b<a) (b==c) || (a<b && b<c) || (c<b && b<a)) result = b; else if (a<c && c<b) result = c; return result; } 21
int med_broken(int a, int b, int c) { int result; Input Expected Pass? if ((a==b) || (a==c) || 0,0,0 0 ✓ (b<a && a<c) || 2,0,1 1 X (c<a && a<b)) 0,0,1 0 ✓ result = a; 0,1,0 0 ✓ if (b < a) 0,2,1 1 ✓ result = c; 0,2,3 2 ✓ if (b<a) (b==c) || (a<b && b<c) || (c<b && b<a)) result = b; if (a<c && c<b) result = c; return result; } 22
int med_broken(int a, int b, int c) { int result; Input Expected Pass? if ((a==b) || (a==c) || 0,0,0 0 ✓ (b<a && a<c) || 2,0,1 1 ✓ (c<a && a<b)) 0,0,1 0 ✓ result = a; 0,1,0 0 ✓ if (b < a) 0,2,1 1 ✓ result = c; 0,2,3 2 ✓ else if (b<a) (b==c) || (a<b && b<c) || (c<b && b<a)) result = b; else if (a<c && c<b) result = c; return result; } 23
int med_broken(int a, int b, int c) { int result; if ((a==b) || (a==c) || (b<a && a<c) || (c<a && a<b)) result = a; if ((b==c) || (a<b && b<c) || (c<b && b<a)) result = b; if (a<c && c<b) Input Expected Pass? result = c; 2,6,8 6 ✓ return result; 2,8,6 6 ✓ 6,2,8 6 ✓ } 6,8,2 6 ✓ 8,2,6 6 X 8,6,2 6 ✓ 9,9,9 9 ✓ 24
int med_broken(int a, int b, int c) { int result; Input Expected Pass? if ((a==b) || (a==c) || 0,0,0 0 ✓ (b<a && a<c) || 2,0,1 1 ✓ (c<a && a<b)) 0,0,1 0 ✓ result = a; 0,1,0 0 ✓ if (b < a) 0,2,1 1 ✓ result = c; 0,2,3 2 ✓ else if (b<a) (b==c) || (a<b && b<c) || (c<b && b<a)) Input Expected Pass? result = b; 2,6,8 6 ✓ else if (a<c && c<b) 2,8,6 6 ✓ 6,2,8 6 X result = c; 6,8,2 6 ✓ return result; 8,2,6 6 ✓ } 8,6,2 6 X 9,9,9 9 ✓ 25
26
WHAT IF… Instead of trying to make small changes, we replaced buggy regions with code that correctly captures the overall desired logic? Principle: using human-written code to fix code at a higher granularity level leads to better quality repairs. 27
SearchRepair: THE PLAN 1. Localize bug to a region. 2. Create input/output examples that show what the code should do. 3. Use semantic code search to find snippets that do the right thing. 4. Construct and test candidate patches for each result from the search. 28
encoding Snippet DB Results patch construction Profile/ Queries fault localization + analysis 29
MODIFIED SB-FAULT LOCALIZATION Input Expected Pass? int med_broken(int a, int b, int c) { 6,2,8 6 ✓ int result; 6,8,2 6 ✓ if ((a==b) || (a==c) || 8,2,6 6 X (b<a && a<c) || 8,6,2 6 ✓ (c<a && a<b)) result = a; else if ((b==c) || (a<b && b<c) || (c<b && b<a)) result = b; else if (a<c && c<b) result = c; return result; } James A. Jones, Mary Jean Harrold, and John Stasko. Visualization of test information to assist fault localization. ICSE 2002. M. Gabel and Z. Su. A study of the uniqueness of source code. FSE, 2010. 30
encoding Snippet DB Results patch construction Profile/ Queries fault localization + analysis 31
SEARCHREPAIR: HIGH-QUALITY AUTOMATED BUG REPAIR USING SEMANTIC SEARCH 32
SEMANTIC CODE SEARCH Keyword: “C median three numbers” Semantic: Input Expected 2,6,8 6 2,8,6 6 6,2,8 6 6,8,2 6 8,6,2 6 9,9,9 9 K. T. Stolee, S. Elbaum, M. B. Dwyer, "Code search with input/output queries: Generalizing, ranking, and assessment”, JSS 2015. K. T. Stolee, S. Elbaum, and D. Dobos. 2014. "Solving the Search for Source Code". TOSEM 2014. Steven P. Reiss. Semantics-based code search. ICSE, 2009. 33
Query 2,6,8 à 6 Code Results Search Repository Engine 34
Query Matching R a n Code k Results Search i Repository n Engine g Indexing 35
SEMANTIC CODE SEARCH 1. Store candidate snippets as symbolic constraints. 2. Search using input/output examples that show what the desired code should do. 3. See which symbolic constraints are co- satisfiable with the input/output examples constraints (Z3). 36
Recommend
More recommend