################################################################################ # # # magrittr pipes and loon # (and some maps and serial axes glyphs) # ################################################################################ # # There are two popular piping packages in R # magrittr and piper # # Here we will use magrittr since it is part of the very popular tidyverse # package. # # Similar piping can be done (possibly more easily) with the piper package # which also works well with the data.table package # (competitor to dplyr of tidyverse) # # # Packages # library(loon) library(magrittr) library(dplyr) library(maps) # If you loaded tidyverse (with purrr) # wherever map appears here maps::map should # # The data: "Canadian Visible Minority Data 2006" # from the loon package; Source: Statistics Canada # See help("minority") # data("minority") str(minority) rownames(minority) names(minority) # Statistics Canada names plus # two different geolocation sources # NOTE: Google's has an error for our purposes. # ######## # # PIPES: %>% # # Pipe: flow of data from one piece to the next # With %>% pipe, the output/value of one form # becomes the FIRST argument of the next. # minority %>% transmute(Arab = Arab / Total.population, Black = Black / Total.population, Chinese = Chinese / Total.population, Filipino = Filipino / Total.population, Japanese = Japanese / Total.population, Korean = Korean / Total.population, Latino = Latin.American / Total.population, Multiple = Multiple.visible.minority / Total.population, S.Asian = South.Asian / Total.population, SE.Asian = Southeast.Asian / Total.population, W.Asian = West.Asian / Total.population, Other = Visible.minority.not.included.elsewhere / Total.population, Minority = Visible.minority.population / Total.population ) -> minorityProportion # # Which gives each visible minority as a percent of the total pop. # and shortens the names. # # Another data set of interest might be the # each visible minority as a percent of the total visible minority. # minority %>% transmute(Arab = Arab / Visible.minority.population, Black = Black / Visible.minority.population, Chinese = Chinese / Visible.minority.population, Filipino = Filipino / Visible.minority.population, Japanese = Japanese / Visible.minority.population, Korean = Korean / Visible.minority.population, Latino = Latin.American / Visible.minority.population, Multiple = Multiple.visible.minority / Visible.minority.population, S.Asian = South.Asian / Visible.minority.population, SE.Asian = Southeast.Asian / Visible.minority.population, W.Asian = West.Asian / Visible.minority.population, Other = Visible.minority.not.included.elsewhere / Visible.minority.population ) -> minorityProportionVisible # Can use this data to make a histogram of the percent # visible minorities in 33 Canadian cities in 2006 # l_hist(100 * minorityProportion$Minority, title = "Visible minority % in 33 Canadian cities (2006)", xlabel = "percent of population", linkingGroup = "minority", yshows = "frequency", origin = 0, # bin edge binwidth = 1, showBinHandle = FALSE, showGuides = TRUE, showScales = TRUE) # # Or another one with a pipe # (Gratuitous use of pipes) # and a forward assignment (preserves flow metaphor) # minority %>% with(l_hist(Total.population/1000000, # pop in millions title = "2006 Population for 33 Canadian cities", xlabel = "Total population (in millions)", binwidth = 0.1, # 100,000 pop bin widths showGuides = TRUE, showScales = TRUE, linkingGroup = "minority") ) -> hTotPop # # EXPLORE: # Select bars in either histogram to see how # population and percent visible minority connect # on Canadian cities in 2006 # # ######## # # T PIPES: # # A pipe, %>%, just connects computational forms by # passing the OUTPUT of one on to be the INPUT TO the next. # # A "T pipe", %T>%, feeds the previous result to the form, # BUT has the INPUT pass on to be the OUTPUT FROM the next form. # # METAPHOR: Think of a T-connection in plumbing. # # This comes up in loon where many functions operate on a loon plot # but do NOT return a loon plot. # They might, for example, return a layer. # # Here is a somewhat artificial (and overly documented) example where # T pipes are used to push the l_hist() through # to the end, rather than the result of any # intermediate form. # minorityProportionVisible %>% select(Multiple) %>% transmute(Multiple = 100 * Multiple) %>% l_hist(title = "Multiple minority % of all minorities", xlabel = "percent of visible minority", origin = 0, # bin edge binwidth = 1, showGuides = TRUE, showScales = TRUE, color = "skyblue" ) %T>% # makes sure the l_hist is passed on # AFTER the next form is evaluated with the l_hist as input. # # E.g. somewhat artificially, we might be # joining a linking group AFTER our plot is constructed # but still want to PUSH our linked states to the rest # of the group. # # This requires an l_configure with 2 arguments to achieve # the synchronization. # # Rather than the value of the following l_configure() # moving on to the next form, the above T pipe # ensures that the l_hist is noved on to the form # after the l_configure(). # l_configure(linkingGroup = "minority", sync = "push") %T>% # # This second T pipe is (above) now ensures that the l_hist, # in addition to being the input to the l_scaleto_plot(), # will also be the pushed on after the l_scaleto_plot() and # so on to be assigned to the variable hMultiple l_scaleto_plot() -> # assigned to hMultiple # # ######## # # A more complex (and less gratuitous) example. # # Here we will build up an interactive map of Canada to allow # us to explore the spatial relations (if any) between the # collection of minorities and their geolocations. # # MULTIVARIATE glyphs will be used. # ####### # # First get some maps # (though these could have been done in place in the pipeline) # # NOTE # Getting the map of Canada plus the lakes has # become more complicated with recent releases # of the maps package. # # World lakes now appear separately from countries. # # Getting a map of Canada is easy. # Canada.land <- map("world", "Canada", plot=FALSE, fill=TRUE) # However, to make a more complex pipeline constructing the final map # (and just because a map of Canada looks weird without its large lakes) # we will build a more complex (and accurate) map. # # TODO # - First we need find the lake names and then select those that # are in Canada. # # - Use the names to select the lake map coordinates. # # - Build a pipeline to construct the interactive map. # # # # The lake names (needed to extract their maps): all.lake.names <- map("lakes", plot=FALSE, fill=TRUE)$names # # The Canadian lakes and details. # (Had to actually identify these by hand!!!) CDN.lake.names <- all.lake.names[c(7:10, 22, 25, 27, 33, 68:73, 80, 82, 85)] CDN.interior.island.names <- all.lake.names[c(23:24, 26, 74, 81, 83:84)] CDN.boundary.island.names <- all.lake.names[c(10, 28,30, 53,56:58, 60, 64:66)] CDN.lake.island.names <- c(CDN.interior.island.names, CDN.boundary.island.names) # # Some islands in boundary lakes are US territory. US.boundary.island.names <- all.lake.names[c(29,31, 45:52, 54, 55, 59, 61:63)] # # Extract the lake data map coordinates # CDN.lakes <- map("lakes", CDN.lake.names, plot=FALSE, fill=TRUE) CDN.lake.islands <- map("lakes", CDN.lake.island.names, plot=FALSE, fill=TRUE) US.lake.islands <- map("lakes", US.boundary.island.names, plot=FALSE, fill=TRUE) # # Choose some colours # landcol <- "cornsilk" lakecol <- "steelblue" UScol <- "grey50" # 50 states? 50 shades? # # Here is the base scatterplot of the geo locations of the # 33 Canadian cities # p_map <- with(minority, l_plot(long, lat, xlabel = "longitude", ylabel = "latitude", itemLabel = rownames(minority), showItemLabels = TRUE, showLabels = FALSE, linkingGroup = "minority") ) # # EXPLORE # - Identify cities on the map by using item labels. # - Connect cities to the histograms by brushing. # # Multivariate considerations (foreshadowing): # For reasons, that will be apparent momentarily, # an ordering of the variables is chosen (rough grouping) # varNameOrder <- c("Arab", "Black", "Latino", "Multiple", "Japanese", "Korean", "Chinese", "SE.Asian", "Filipino", "Other", "S.Asian", "W.Asian") # # NOTE # As the pipeline is built, layers have to be organized sensibly. # Namely: # - islands on lakes # - lakes on map # - cities (scatterplot) on map # ######## # # The interactive map pipeline # p_map %T>% l_layer(CDN.lake.islands, label = "CDN lake islands", color = landcol, # Same colour as the land of Canada map index="end" # Put these at the bottom of layers ) %T>% l_layer(US.lake.islands, label = "US lake islands", color = UScol, # Uses a different colour to distinguish US index="end" # Put these at the bottom of layers ) %T>% l_layer(CDN.lakes, label = "Lakes", color = lakecol, index="end" # Put these at bottom below the islands ) %T>% l_layer(Canada.land, label = "Canada", color = landcol, index="end" # Put the land map below all ) %T>% l_scaleto_world() %T>% # Add some glyph options # # - city names as possible glyphs l_glyph_add_text(text = row.names(minority), label = "city names") %T>% # - serialaxes as possible glyphs (different data) l_glyph_add_serialaxes( # Proportion compared to total visible minority in city data = minorityProportionVisible, sequence = varNameOrder, # Observation scaling: # scales variable by maximum of each city scaling = "observation", linewidth = 2, showArea = TRUE, label = "Minority based") %T>% # - serialaxes as possible glyphs (different data) l_glyph_add_serialaxes( # Proportion compared to total population in city # Ignore the total minority proportion in last column data = minorityProportion[, -ncol(minorityProportion)], sequence = varNameOrder, # Variable scaling: # scales each variable by maximum over all cities # for that variable. T scaling = "variable", linewidth = 2, showArea = FALSE, label = "Population based") %>% l_configure(size = 10, color = "firebrick", glyph = "glyph0") # # NOTE: # - the points in the scatterplot now appear as the city names. # - In the map (scatterplot) inspector, near the bottom, # "city names, glyph 0" # appears as a glyph choice # # # TRY: # On the map inspector: # # 1. Select all the points # - on the glyph menu near the bottom # click (and hold) on "city names, glyph 0" # to make a menu pop up # - choose "Minority based, glyph 1" # - press set # - deselect all # # Each city is replaced by a shape # # # On the map: # # 2. Zoom and pan to explore how these shapes # - vary from city to city # - relate to their geographic location (if at all) # # These glyphs show the DISTRIBUTION of visible minorities # within a city (compared to the city's largest visible minority) # # Observation scaling means that # the largest minority in each city will have the largest value (1) # and the smallest will have the smallest value (0). # # ######## # # Key to glyphs: # Interpretation of glyphs will be easier if we match them # with their own displays. # # Start with the same scaling as the map. # # # The first (as set up) shows the distibution # of the minorities scaling within city sa_visible <- l_serialaxes(minorityProportionVisible, sequence = varNameOrder, scaling = "observation", title = "Minority based", itemLabel = row.names(minority), showItemLabels = TRUE, color = "firebrick", linewidth = 3, linkingGroup = "minority") # The second (as set up) shows the proportions for each minority # as it compares across the country. sa_visible <- l_serialaxes(minorityProportion[, -ncol(minorityProportion)], sequence = varNameOrder, scaling = "variable", title = "Population based", itemLabel = row.names(minority), showItemLabels = TRUE, color = "firebrick", linewidth = 3, linkingGroup = "minority") # EXPLORE: # # 1. Characteristics of different cities and regions # # 2. Change the map to be which ever glyph is of interest # # 3. Change the glyph parameters on the map by # - selecting the "Glyphs" tab in the map inspector # - selecting the glyph you want to change # - change the glyph parameters for that glyph # (all points will have the same set of parameters) # # Make sure you update the parameters of the corresponding # serial axes display to allow interpretation. # # # ################################################################################