Compilaon of Incomplete C...

34
Compila(on of Incomplete C Sources Fernando Magno Quintão Pereira [email protected]

Transcript of Compilaon of Incomplete C...

Compila(onofIncompleteCSources

FernandoMagnoQuintã[email protected]!

Goal

•  Thegoalofthisprojectistoreconstructautoma(callythedeclara(onsoftypesandfunc(onsnecessarytocompileasnippetofCcode.

sta(cintvn(constu8*x,constu8*y,intn){u32i,d=0;for(i=0;i<n;++i)d|=x[i]^y[i];return(1&((d-1)>>8))-1;}

intcrypto_verify_16(constu8*x,constu8*y){returnvn(x,y,16);}

AvailableSource

Goal

•  Thegoalofthisprojectistoreconstructautoma(callythedeclara(onsoftypesandfunc(onsnecessarytocompileasnippetofCcode.

sta(cintvn(constu8*x,constu8*y,intn){u32i,d=0;for(i=0;i<n;++i)d|=x[i]^y[i];return(1&((d-1)>>8))-1;}

intcrypto_verify_16(constu8*x,constu8*y){returnvn(x,y,16);}

typedefintu8;typedefintu32;

AvailableSource ReconstructedSource

Example1

GraphGRAPHinit(intV){GraphG=malloc(sizeof*G);G->V=V;G->E=0;G->adj=MATRIXint(V,V,0);returnG;}

typedefstructGraph{intV;intE;uintptr_tadj;}*Graph;

uintptr_tMATRIXint(int,int,int);

AvailableSource

ReconstructedSource Thiscodeisfrom"AlgorithmsinC",ThirdEdi<on,byRobertSedgewick,Addison-Wesley,2002,availableathDps://www.cs.princeton.edu/~rs/Algs3.c5/code.txt

Example2

voidGRAPHinsertE(GraphG,Edgee){intv=e.v,w=e.w;if(G->adj[v][w]==0)G->E++;G->adj[v][w]=1;G->adj[w][v]=1;}

typedefstructGraph{int**adj;uintptr_tE;}*Graph;

typedefstructEdge{intv;intw;}Edge;

AvailableSource

ReconstructedSource Thiscodeisfrom"AlgorithmsinC",ThirdEdi<on,byRobertSedgewick,Addison-Wesley,2002,availableathDps://www.cs.princeton.edu/~rs/Algs3.c5/code.txt

Contribu(on

•  Psyche-c:atoolthattakespartofaCprogram,andreconstructasmuchofitasitisnecessarytocompileit.

•  Why?– Codecomple(on– Debugging– Reducedcompila(on(me

– Security

Youcantestitrightnow:hjp://cuda.dcc.ufmg.br/psyche-c/

TheBigPicture

•  Someprogramsaredifficulttocompile•  Butwes(llwanttoanalyzethem

Wholeprogram

Targetprogram

Compiler

Sta(cAnalysis DynamicAnalysis

psyche-c

Reconstructedprogram

WhattodowithSecurity?

•  Sta(canalysistoolsneedtoparsethesourceprogram•  Dynamicanalysistoolsneedtocompiletheprogram

•  Analyzinglargeprogramsistricky– Unavailablelibraries– Unresolveddependencies

•  Psyche-csolvestheseproblems!

Demo

•  In2014wereleasedFlowTracker– Findsnon-isochronousbehaviorinCprograms

#defineFOR(i,n)for(i=0;i<n;++i)

sta(cintvn(constu8*x,constu8*y,intn){u32i,d=0;FOR(i,n)d|=x[i]^y[i];return(1&((d-1)>>8))-1;}

intcrypto_verify_16(constu8*x,constu8*y){returnvn(x,y,16);}

CodetakenfromTweetNaCl,acryptolibraryin100tweets,availableathjps://tweetnacl.cr.yp.to/

Isthiscodeontherightisochronous?

Demo

•  In2014wereleasedFlowTracker– Findsnon-isochronousbehaviorinCprograms

#defineFOR(i,n)for(i=0;i<n;++i)

sta(cintvn(constu8*x,constu8*y,intn){u32i,d=0;FOR(i,n)d|=x[i]^y[i];return(1&((d-1)>>8))-1;}

intcrypto_verify_16(constu8*x,constu8*y){returnvn(x,y,16);}

CodetakenfromTweetNaCl,acryptolibraryin100tweets,availableathjps://tweetnacl.cr.yp.to/

Thecodeisisochronous.However,onceweanalyzeitssourcefile,tweetnacl.c,wegotadifferentpicture.

DemoLine 61 -> Line 57 -> Line 58 -> Line 63 -> Line 242 -> Line 261 -> Line 131 -> Line 134

61: crypto_verify_16(x, y)

58: return RET_VAL = (1 & ((d - 1) >> 8) - 1)

63: return RET_VAL

242: return RET_VAL = crypto_verify_16(h, x)

261: if (crypto_onetimeauth_verify(c + 16,c + 32,d - 32,x) != 0)

262: crypto_stream_xor(m,c,d,n,k)

vn(x, y, 16)57: for (int i = 0; i < n; i++) d |= x[i]^y[i]

return - 1

1: the return value of crypto_onetimeauth_verify is based on the return value of crypto_verify_16, which reads a secret as first parameter.

2: The contents of x will influence the value of d

3: The value of 'd' will return back to the conditional in line 261, through a chain of three return operations.

4: this line belongs into the body of function crypto_onetimeauth_verify, which was called in line 261 of our target program.

5: Indeed, we started analyzing the program through this function, but it belongs into a wider context. And 'x' is a secret...

ifauthen.ca.on()failsprint"fail!"elseprint"Ok!"

Eventhoughcrypto_verify_16isconstant(me,Ibelieveitiss(llpossibleforsecret'x'todeterminetheflowofexecu(onofthisprogram

Theconfusionariseswhenonlythecrypto_verify_16()func(onisspecifiedintheXML,asIdidnotexpectthatcallingcodewouldalsobeexamined

intheend,doyouthinkithasasidechannel?

Thereisnoside-channel,becausefunc(oncrypto_secretbox_open()whichcontainstheaffec(ngcodecanruninvariable(me,giventhatcrypto_one(meauth_verify()runsinconstant(me.Whatcrypto_secretbox_open()doesisrefusingtodecrypttheciphertextiftheauthen(ca(onverifica(onfails.Thefactthattheauthen(ca(onisvalidornotisconsideredpublicinforma(on.Theissueis:howtocapturesuchnuancesinFlowTracker?

TheEssenceoftheProblem

•  FlowTrackerisinter-procedural–  Itanalyzesthewholeprogram

•  Butwewanttoanalyzeonlyasubsetoftheprogram•  Weextractthefunc(onofinterest,andreconstructthe

contextnecessarytocompileitusingpsyche-c

sta(cintvn(constu8*x,constu8*y,intn){u32i,d=0;for(i=0;i<n;++i)d|=x[i]^y[i];return(1&((d-1)>>8))-1;}

intcrypto_verify_16(constu8*x,constu8*y){returnvn(x,y,16);}

<func(ons><sources><func(on><name>crypto_verify_16</name><parameter>2</parameter></func(on></sources></func(ons>

Thisiscodewewanttoanalyze TheXMLquerydescriptor

Example:FlowTrackerwithIncompleteSources

sta(cintvn(constu8*x,constu8*y,intn){u32i,d=0;for(i=0;i<n;++i)d|=x[i]^y[i];return(1&((d-1)>>8))-1;}

intcrypto_verify_16(constu8*x,constu8*y){returnvn(x,y,16);}

FlowTracker'soutput

Originalprogram

Example:FlowTrackerwithReconstructedSources

#include<stdint.h>#include<stddef.h>#include<stdlib.h>#include<string.h>#include<sys/types.h>#include<sys/stat.h>typedefintbool;constboolfalse=0;constbooltrue=1;typedefintu8;typedefintu32;

sta(cintvn(constu8*x,constu8*y,intn){u32i,d=0;for(i=0;i<n;++i)d|=x[i]^y[i];return(1&((d-1)>>8))-1;}

intcrypto_verify_16(constu8*x,constu8*y){returnvn(x,y,16);}

DFG for 'Grafo'

tmp4

Memory 1

getelementptr phi

Memory 4

Memory 2

getelementptr phi

16

phi

n

Const:vn

i.0

add

sext

sexticmp

tmp23

tmp9tmp14

tmp6

phi

0

phi

d.0

1

sub

and

sub

tmp26

tmp28

tmp29

or

tmp20

tmp18

xor

tmp12

load load

tmp17

ashr

tmp27

8

phi

Flow

Tracker'soutpu

t

Code

thatpsyche-chasreconstructed

DEPARTMENTOFCOMPUTERSCIENCEUNIVERSIDADEFEDERALDEMINASGERAIS

FEDERALUNIVERSITYOFMINASGERAIS,BRAZIL

CHALLENGES

Parsing:IsTaTypeoraVariable?

typedef int T;void foo() { T * a;}

typedef int T;void foo() { T * a; T b;}

int T;void foo() { T * a; x = T * b;}

int a, T;void foo() { T * a;}

1

2

3

(a) (b) (c)

void foo() { T * a;}

int T;void foo() { T * a; b + T;}

(d) (e) (f)

1

2

3

4

1

2

3

4

1

2

3

4

5

1

2

3

4

5

1

2

3

4

5

Challenge1:Parsing

•  Problem:thesynta(cnatureofsomeconstructsisdeterminedbytheseman(csofdeclara(onsthatmightbemissing.Example:whatisT?

voidfoo(){T*a;}

•  Solu.on:useasyntaxtreegeneralenoughtorepresentambiguousconstruc(ons,andpostponeparsingdecisionsun(lmoresyntaxisavailable.

TypeInference:isTapointeroraninteger?

void foo() { T b; b = 0; b * 10;}

void foo() { T a; a = 0;}

1

2

3

4

(a) (b) (c)

void foo() { T c; c = 0; *c = 10;}

1

2

3

4

5

1

2

3

4

5

TypeInference:whatisthetypeT?

struct X0 { int a, b, c, d;}

typedef struct X0 T;

void foo() { T v = {1, 2, 3, 4};}

1

2

3

1

2

3

4

5

struct X1 { int a, b;}

typedef struct X1 T[];

1

2

3

4

5 typedef int T[];1

void foo() { T v = {1, 2, 3, 4}; v.a = v.b = 0; v.c = v.d = 1;}

1

2

3

4

5

void foo() { T v = {1, 2, 3, 4}; v[0].a = 0; v[0].b = 1;}

1

2

3

4

5void foo() { T v = {1, 2, 3, 4}; v[0] = 0;}

1

2

3

4

Challenge2:TypeInference

•  Problem:thesamesyntaxcandenotethetypesofdifferentvalues.Example:what'sthetypeT?

voidfoo(){Ta;a=0;}

•  Solu.on:useala|ceofpossibletypes.Thetypeofeachexpressionnavigatesupthisla|ce,asmoresyntaxbecomesavailable,un(lconvergingtoafixedpoint.

ExampleofTypeLa|ce

s

l

n p

m

u

void foo() { T b; b = &b; b * 10;}

void foo() { T a; a = 0;}

void foo() { c = 10; *c = 10; c * 10;}

void foo() { T c; c = 10; *c = 10;}

1

2

1

2

1

2

(b)

(c) (a)

(d)

(e)

Orphans:whatiseachtypeTx?

void f2() { T2 c = malloc(1); *c = 'a';}

void f1() { T1 d = malloc(8); *d = 9.9;}

1

2

3

4

(a) (b) (c)

void f3() { T3 v = malloc(4); *v;}

1

2

3

4

1

2

3

4

•  Problem:Generatetypesforvariableswhosenatureisnotsufficientlyrestrictedbytheprogram’ssyntax.Example:what'sthetypeT?

voidf(){Tv=malloc(4);*v;}

•  Solu.on:defineafamilyoforphantypes.Orphantypesaretypesgeneralenoughtoaccommodatetheminimumavailablesyntax,e.g.,uintptr_t.Thereisnoflowofinforma(onbetweenvariablesindifferentfamiliesoforphantypes.

Challenge3:insufficientinforma(on

Meta-informa(on:whatisthesizeoftypeT?

int main(int argc, char** a) { T* v = get_new_array(argc); int i = 0; while (i < argc) v[i] = 0; i++; return 0;}

1

2

3

4

5

6

7 (a) (b)

T* get_new_array(int N) { size_t s = sizeof(T); T* w = (T*)malloc(N*s); return w}

1

2

3

4

5

•  Problem:recoverthesizeofvectorandstructtypesinalanguagethatlackstypeintrospec(on.

intmain(intargc,char**a){T*v=get_new_array(argc);inti=0;while(i<argc)v[i]=0;i++;return0;}

•  Solu.on:usesta(crangeanalysisonsymbolicintervalstoassociatesizeinforma(onwithtypes,effec(velycrea(ngafamilyofdependenttypes.

Challenge4:lackoftypeintrospec(on

DEPARTMENTOFCOMPUTERSCIENCEUNIVERSIDADEFEDERALDEMINASGERAIS

FEDERALUNIVERSITYOFMINASGERAIS,BRAZIL

THEPOWEROFPSYCHE-C

On-LineInterface

http://cuda.dcc.ufmg.br/psyche-c!

On-lineInterface

•  Psyche-ccanreconstructthedeclara(onsofthefilesavailableingnulibcoreu(ls,e.g.,– base64.c,basename.c,cat.c,chcon.c,chgrp.c,chmod.c,etc.c,etc.c.,etc.c.

•  Sourcesavailableat:hjp://git.savannah.gnu.org/gitweb/?p=coreu(ls.git

AlgorithmsinC

•  Psyche-ccanreconstructalltheexamplesofdatastructuresusedinRobertSedgewick'sclassicbook:AlgorithmsinC

•  Morethan1,200linesofcode.•  Complexdatastructures:graphs,trees,

linked-lists,hash-tables,etc.

AlgorithmsinC

voidTCdfsR(GraphG,Edgee){linkt;G->tc[e.v][e.w]=1;for(t=G->adj[e.w];t!=NULL;t=t->next)if(G->tc[e.v][t->v]==0)TCdfsR(G,EDGE(e.v,t->v));}

typedefstructlink{intv;structlink*next;}*link;

typedefstructGraph{int**tc;structlink**adj;}*Graph;

typedefstructEdge{intv;intw;}Edge;

structEdgeEDGE(int,int);

AvailableSource

ReconstructedSource

ArraySizeInference

•  PreciseboundsforeveryarrayinPolyBench:– hjp://cavazos-lab.github.io/PolyBench-ACC/

•  Preciseboundsfor75%ofthebenchmarksintheLLVMSingleSourcebenchmarksuite.

•  Preciseboundsfor34%ofallthearraysusedinSPECCPU2006.

•  Automa(creconstruc(onofmissingpartsofCsourcespar(allyavailable.

•  Tryitout:hjp://cuda.dcc.ufmg.br/psyche-c.

•  Youmightbeimpressed☺

[email protected]!