Visual Exploration of Large Data sets with D3, crossfilter and dc.js
-
Upload
florian-georg -
Category
Data & Analytics
-
view
2.652 -
download
3
Transcript of Visual Exploration of Large Data sets with D3, crossfilter and dc.js
SoftwareSolutionArchitect@IBM
SoftwareDelivery-practicesandtoolsSoftware-drivenProductInnovationAgile,LeanandDesignThinkingClouddevelopment(IBMBluemix)Analytics,SystemsandIoT
http://bluemix.net
https://internetofthings.ibmcloud.com/
notjuststaticreporting
notjustbasiccharting
notafull-fledgedBIsuite
evenonlargerdatasets
Data-DrivenDocuments
http://d3js.org/
https://github.com/mbostock/d3/wiki/Gallery
http://bl.ocks.org/mbostock
(...lotsof!)
http://bl.ocks.org/mbostock/1005873
http://bl.ocks.org/mbostock/2706022
http://bl.ocks.org/mbostock/10024231
http://bl.ocks.org/mbostock/9539958
vartheData=["Jazoon","D3","IBM","Bluemix"];
varp=d3.select("body").selectAll("p")//DOMelementselector.data(theData)//join/bindtodata.enter()//foreach"new"datapoint.append("p")//...addsomeDOMelements.text(function(d){return"Hello"+d;}).style("background-color","steelblue");
<html><body><scriptsrc="http://d3js.org/d3.v3.min.js">
http://bost.ocks.org/mike/join/
functionredraw(){varcircle=svg.selectAll("circle").data(data);//update-existingcirclesmovetoanewrandomfillcolorcircle.transition().attr("fill",function(d){returngetRandomColor();}).duration(750).delay(10);
//enter-createcirclewitharandomcolorcircle.enter().append("circle").attr("cx",function(d){returnd.x;}).attr("cy",function(d){returnd.y;}).attr("r",function(d){returnd.r;}).attr("fill",function(d){returngetRandomColor();});//exit-transitionradiustowardszero,thenremovecircleelementcircle.exit().transition().attr("r",0).duration(750).remove();}
http://jsbin.com/yubuz/13/edit
vardata=[0,10,20,30,40,50,60,70,80,90,100];
varnodesLinear=d3.select("div#linear").selectAll("p");
//Scalesmapdomainintoarange.//domains/rangescanbenumbers,colors,timestampsetc.//D3supportsvariousscalesandinterpolatorsforthemapping,//andyoucanwriteyourown...varcolor=d3.scale.linear().domain([0,Math.max.apply(Math,data)]).range(["yellow","red"]);
nodesLinear.data(data).enter().append("p").text(function(d){return"linearinterpolation"+d+"==>"+color(d);}).style("background-color",color);
http://jsbin.com/segoyu/3/edit
//makesureyouunderstandthedatastructurethatis//expectedbythelayoutvarroot={children:[{children:[{value:42},{value:10,omit:1}]},{value:42},{value:10,omit:1}]};
varwidth=480,height=300;varsvg=d3.select("body").append("svg").attr("width",width).attr("height",height)
//packlayout(canbeusedforbubblechartsetc.)varpack=d3.layout.pack().size([width,height]);
svg.selectAll("circle")//bindtoyourdata,"decorated"bythelayout..data(pack.nodes(root).filter(function(d){return!d.omit;})).enter().append("circle")//thelayoutknowswheretopositionelements,//andaddspositioningattributeslike"x","y"etc.toyourdata//(readthedocstoavoidsurprises).attr("cx",function(d){returnd.x;}).attr("cy",function(d){returnd.y;}).attr("r",function(d){returnd.r;});
http://jsbin.com/dequma/8/edit
functionupdate(data){
//DATAJOINvartext=svg.selectAll("text").data(data,function(d){returnd;});
//UPDATEtext.attr("class","update").transition().duration(750).attr("x",function(d,i){returni*32;});
//ENTERtext.enter().append("text").attr("class","enter").attr("dy",".35em").attr("y",-60).attr("x",function(d,i){returni*32;}).style("fill-opacity",1e-6).text(function(d){returnd;}).transition().duration(750).attr("y",0).style("fill-opacity",1);
//EXITtext.exit().attr("class","exit").transition().duration(750).attr("y",60).style("fill-opacity",1e-6).remove();} http://jsbin.com/pezagi/2/edit
PureHTML5/JavaScriptSupportsSVGandCanvasVeryflexibleMatureprojectExcellentdocumentationWideadoption
learningcurvequitelow-levelIntegrationwithUIFrameworks(JQuery,Angular...)notscalingwelltolargedatasets
http://bl.ocks.org/mbostock/4063663
http://bl.ocks.org/mbostock/7586334
http://square.github.io/crossfilter/
//Asmallsampleoftheclassiccarsdatasetvarcsv="name,economy(mpg),cylinders,displacement(cc),power(hp),weight(lb),0-60mph(s),year\n"+"AMCAmbassadorBrougham,13,8,360,175,3821,11,73\n"+"AMCAmbassadorDPL,15,8,390,190,3850,8.5,70\n"+"AMCAmbassadorSST,17,8,304,150,3672,11.5,72\n"+"AMCConcordDL6,20.2,6,232,90,3265,18.2,79\n"+"AMCConcordDL,18.1,6,258,120,3410,15.1,78\n"+"AudiFox,29,4,98,83,2219,16.5,74\n"+"BMW2002,26,4,121,113,2234,12.5,70\n"+"BMW320i,21.5,4,121,110,2600,12.8,77\n"+"BuickCentury350,13,8,350,175,4100,13,73\n"+"BuickCenturyLimited,25,6,181,110,2945,16.4,82\n"+"ChryslerNewportRoyal,13,8,400,190,4422,12.5,72\n"+"CitroenDS-21Pallas,6,4,133,115,3090,17.5,70\n"+"Datsun1200,35,4,72,69,1613,18,71";
varcf=crossfilter(data);
varbyCylinders=cf.dimension(function(p){returnp.cylinders;});
//createagroupspernumberofcylindersvargroupByCylinders=byCylinders.group();
varvisCylinders=d3.select("body").select("div#byCylinders").selectAll("div").data(groupByCylinders.top(Infinity))//data:allelementsinsidegroup.enter().append("div").text(function(d){//groupis'key','value'dependsonthereducefunction(default:count)return"Carswith"+d.key+"cylinders:"+d.value;});
http://jsbin.com/biruro/6/edit
varcf=crossfilter(data);varbyName=cf.dimension(function(p){returnp.name;});
vargroupByMeanPower=//MAP:createagroupperbrandname(e.g."BMW")byName.group(function(d){returnd.substring(0,d.indexOf(""));})//REDUCE:incrementallycalcaveragepowerinsideeachgroup.reduce(//addfunction(p,v){p.totalPower+=+v["power(hp)"];p.count++;p.avg=(p.totalPower/p.count);returnp;},//removefunction(p,v){p.totalPower-=+v["power(hp)"];p.count--;p.avg=(p.totalPower/p.count);returnp;},//initfunctioninit(){return{totalPower:0,count:0,avg:0};})//ordergroupusingtheresultingaveragefromthereducestep.order(function(d){returnd.avg;});
http://jsbin.com/pubaz/2/edit
http://square.github.io/crossfilter/
Coordinatedviewsondata("brushing")IncrementalfilteringExtremelyfastforhistograms,top-nlistsetc.
Dimensionaldata(nottimeseries,continuous)Onefilterperdimensionslowwrite/fastreadoperationsFewexamplesoutthereLoad&Explore(e.g.not"realtimeanalysis")Dimensionsareexpensive(32max)Doesn'tlikeNaN
http://dc-js.github.io/dc.js/
//[...]vardateDim=crossfilter.dimension(function(d){returnd.date;});varhits=dateDim.group().reduceSum(function(d){returnd.total;});varminDate=dateDim.bottom(1)[0].date;varmaxDate=dateDim.top(1)[0].date;
varstatus_200=dateDim.group().reduceSum(function(d){returnd.http_200;});varstatus_302=dateDim.group().reduceSum(function(d){returnd.http_302;});varstatus_404=dateDim.group().reduceSum(function(d){returnd.http_404;});
dc.lineChart("#chart-line-hitsperday").width(500).height(200).dimension(dateDim).group(status_200,"200").stack(status_302,"302").stack(status_404,"404").renderArea(true).x(d3.time.scale().domain([minDate,maxDate])).brushOn(false).legend(dc.legend().x(50).y(10).itemHeight(13).gap(5)).yAxisLabel("Hitsperday");
http://jsbin.com/zapupe/4/edit
http://dc-js.github.io/dc.js/
JSONisverboseserver-sidepreprocessingdimension.dispose()
DCusesSVG(notCanvas)multiple"simple"visualizations>one"complex"
Demo,slides,linksetc:http://datavisual.mybluemix.net
http://www.ibm.com/developerworks/