################################################################################ # # # Example: New Zealand Covid cases -- May 2020. # # ################################################################################ # # # # This data is available from the loon.data package (release >= 0.1.0) # # Note: there are several data sets in loon.data # l_web(package = "loon.data", directory = "reference") # # If you installed the diveR package, library(diveR) # will attach all loon related packages at once # Otherwise, we will attach the packages one at a time as needed. # library(loon.data) # # Access the data # data("covidNZ", package = "loon.data") # # To get some idea of the contents, we can ask for its # variable names # names(covidNZ) # # The detailed contents can be seen in spreadsheet form # in RStudio using # View(covidNZ) # # Examining its data structure # str(covidNZ) # # Everything seems to be a character vector. # # We can formally change these reflect their true values # # Sex, Age, and Last_country_visited could be factors # covidNZ$Sex <- as.factor(covidNZ$Sex) covidNZ$Age <- as.factor(covidNZ$Age) covidNZ$Last_country_visited <- as.factor(covidNZ$Last_country_visited) # # There are three dates in the form "year-month-day". # For plotting purposes, we turn these into formal `Date` objects # covidNZ$Case_date <- as.Date(covidNZ$Case_date) covidNZ$Flight_departure_date <- as.Date(covidNZ$Flight_departure_date) covidNZ$Arrival_date <- as.Date(covidNZ$Arrival_date) # # The data structure can be seen in detail: # str(covidNZ) # # As can be seen, now there are different data types: # - "Date", # - "character", and # - "Factor" # # Note: # - None are simply "numeric" # - Also lots of missing values: NA or "" # # Each row corresponds to an individual person # with a confirmed case of SARS-CoV-2 (or COVID-19 or ...) # # More details on the data are available via help. # help(covidNZ) # # A quick statistical summary of the contents # summary(covidNZ) ######################################## # # interactive exploration with loon # (release >= 1.3.6) # ######################################## # # # First load loon # library(loon) # The R manual for loon is available online: # l_web() # # The most common display of data like this is a # bar chart showing the number of cases daily. # # The data is found on the variable Case_date # of covidNZ in year-month-day format: # head(covidNZ$Case_date) # with most recent cases at the top. # # ######################################## # # Histograms in loon: l_hist() # ######################################## # l_hist(covidNZ$Case_date) # # which produces # 1. a window showing the histogram # 2. something called a loon inspector # 3. a value returned and printed in the R Console pane. # # NOTE: # The windows can be moved like any other window on your screen. # ... typically by grabbing the title bar of the window. # # You will want to move the window containing the histogram away # from the inspector. # # # Observation: # From the histogram alone the incidence pattern of covid-19 cases # early in the pandemic in New Zealand follows the now typical wave. # # TRY: With your mouse on the INSPECTOR observe what happens when you # # 1. check and uncheck the "swap" box # 2. check and uncheck the "guides" box # 3. check and uncheck the "scales" box # Note that the date is days since some origin (1970-01-01) # 4. uncheck and check the "labels" box # 5. uncheck and check the "stacked colors" box # 6. uncheck and check the "outlines" box # # # # # # TRY: With your MOUSE # # On the INSPECTOR # # 1. check the "bin handle" box. # What happened? # # On the HISTOGRAM WINDOW # # 2. press your mouse on the triangle appearing at the bottom # left of the histogram window and # HOLDING THE MOUSE BUTTON, # DRAG THE TRIANGLE HORIZONTALLY TO THE RIGHT # # What happens? # - in the histogram window? # - in the inspector? # # 3. press your mouse on the BOX appearing at the bottom # left of the histogram window and # HOLDING THE MOUSE BUTTON DRAG THE BOX HORIZONTALLY # # What happens? # - in the histogram window? # - in the inspector? # # What do the box and triangle of the bin handle define? # # # On the INSPECTOR # # 4. Click and hold your mouse on the "World View", # and move it around. # # What happens? # - in the inspector? # - in the histogram window? # # 5. Repeat 4 but hold the key down at the SAME TIME # # What happens? # - in the inspector? # - in the histogram window? # # 6. Repeat 4 but, at the SAME TIME, hold the # (or or ) key down # # What happens? # - in the inspector? # - in the histogram window? # # On the HISTOGRAM WINDOW # # 7. On the background of the plot, # with the RIGHT mouse button continually pressed: # - move the mouse # - hold the key and at the SAME TIME # move the mouse (RIGHT button pressed) # - hold the (or or ) key # and at the SAME TIME # move the mouse (RIGHT button pressed) # # What happens? # - in the histogram window? # - in the inspector? # # Try SCROLLING while on: # - on the histogram # - on the world view # # On the INSPECTOR # # 8. Press the PLOT button beside "scale to:" in the inspector. # What happens? # - in the inspector? # - in the histogram window? # # 9. Beside "yshows:" in the inspector, select "density" # What happens? # - in the inspector? # - in the histogram window? # # Check the "scales" box and switch back and forth # between "frequency" and "density" # # # # # On the HISTOGRAM WINDOW # # 10. With "density" selected in the inspector, # again move the ends of the bin handle around. # # Vary the bin size from small to large. # # What happens? # - in the inspector? # - in the histogram window? # # Choose a "sensible" bin size AND # when you are done: # - uncheck the "scales" box # - uncheck the "bin handle" box. # # # # 11. SELECT any bar LEFT of the histogram's mode # and colour it light blue. # (Press the "none" button beside "static:" to see the result, # OR click the mouse on the background of the histogram.) # # SELECT any bar RIGHT of the histogram's mode # and colour it dark blue. # # SELECT the 3 TALLEST bars AT THE SAME TIME # and colour them red. # - can you guess how this might be done? # - is there more than one way? # # What does the key do? # # # 12. Again check the "bin handle" and move the ends of # the bin handle around. # What happens? # - in the inspector? # - in the histogram window? # # # 13. Select all pieces of the histogram (at the same time) # that are red. # # What happens? # - in the inspector? # - in the histogram window? # # Make this selection in TWO different ways: # - from the inspector # - on the histogram only # # # # On the INSPECTOR # # 14. Uncheck and check the "stacked colors" box. # # What happens? # - in the inspector? # - in the histogram window? # # 15. Select the "brushing" radio button. # # What happens in the histogram window? # - this is called the "brush" # # Go to the histogram window # - move the mouse with the LEFT button pressed # - release the mouse button, then press it down # on the small square at the bottom right of the # brush and (holding it down) reshape the brush. # (reshaping ends when the mouse button is released) # - move the reshaped brush around # (try pressing the key at the same time) # # This is called "brushing". # With the key depressed the selection is "sticky" # (as if painting with the brush). # # 16. Select the "sweeping" radio button to return to # the usual selection. # Click on the NONE button to deselect all selections # # Go to the histogram window # - LEFT click on the background of the histogram, # hold the mouse down and move it. # # What happens? # - try again with the key down # # This is called "sweeping" # (as in a broom clearing space, the rectangle is swept out) # # 17. Select "by color" one of the colours in the histogram. # Select the "deactivate" button. # # What happens? # - in the inspector? # - in the histogram window? # # Repeat the above with another colour. # # Select the "reactivate" button and scale to "plot". # # # # ########## # # SLIDES: Summary of inspector and mouse movements # ######### # # # # # ######### # # QUESTION: How does the value returned in the Console # relate to the window showing the histogram? # # # # # # ANSWER: # In the window title bar, # appearing after "path: " is the string ".l0.hist" # # Try this # l_getFromPath(".l0.hist") # The string following "path:" in the window UNIQUELY IDENTIFIES the # loon plot. (Note the "." at the beginning.) # NOTE: If you accidentally made more than one histogram, the string # would be slightly different (the number would change). # # l_getFromPath() returns a pointer to the loon plot (which is in TCL) # # This lets us save the plot structure on an R variable # hcases <- l_getFromPath(".l0.hist") hcases # Although just a string with a class attribute in R, the loon plot # is actually a rich data structure. # # For example, with this pointer we can take a static R plot # "snapshot" of the current view of the histogram plot(hcases) # Note that the "bin handle" (if still on) does not appear in # the R plot. # # This plot can be exported, zoomed, etc. like any other R plot. # # TRY: # Interactively change some features of the loon histogram # (either in the histogram window itself, or through the inspector). # Then call plot() again. # plot(hcases) # You can do this any time you want a static snapshot of the current display. # # # NOTE: # The result is actually a grid graphic stucture g_hcases <- plot(hcases, draw = FALSE) g_hcases # This is actually a very rich data structure which could be used # in other grid graphics. # (Each call of plot() produces a different grid structure.) # For example, you could explore the grid structure with View(g_hcases) ######## # # Many of a loon histogram's features are accessible from R. # # To see what these are names(hcases) # These can be accessed and changed using R's [.] notation. # # For example, hcases["showGuides"] hcases["xlabel"] # which can be changed PROGRAMMATICALLY by assignment hcases["showGuides"] <- FALSE hcases["showGuides"] <- TRUE hcases["showScales"] <- TRUE hcases["showScales"] <- FALSE hcases["showBinHandle"] <- TRUE hcases["showBinHandle"] <- FALSE hcases["xlabel"] <- "case date" hcases["title"] <- "New Zealand Covid 19 cases" plot(hcases) # The most interesting features relate to the data. # hcases["n"] # the number of points/cases appearing in the plot # cannot be changed and so # does NOT appear in names(hcases) hcases["x"] # the data itself (as a numeric vector) hcases["selected"] # which cases are selected (a logical vector) hcases["active"] # which cases are active (a logical vector) hcases["color"] # colours of the cases (a character vector) # (in TCL colours are 12 character hexadecimal) # # These too can be changed hcases["selected"] <- FALSE # deselect all cases # randomly activate/deactivate cases hcases["active"] <- sample(c(TRUE, FALSE), size = hcases["n"], # number of cases # (cannot be changed) replace = TRUE) # repeat the above as often as you like # hcases["active"] <- TRUE # activate all cases # randomly assign colours from a list of three hcases["color"] <- sample(c("pink", "skyblue", "lightgreen"), size = hcases["n"], # number of cases # (cannot be changed) replace = TRUE) # repeat the above as often as you like # # randomly assign colours from first 10 loon's default colours: hcases["color"] <- sample(l_getColorList()[1:10], size = hcases["n"], # number of cases # (cannot be changed) replace = TRUE) # repeat the above as often as you like # # To find out how many unique colours you have # length(unique(hcases["color"])) hcases["binwidth"] <- 7 # Note all bin widths are IDENTICAL always # Here each bin is a week (7 days) hcases["color"] <- "skyblue" # set all cases to the same colour # Might have to rescale the histogram (after resetting the binwidth) l_scaleto_plot(hcases) plot(hcases) # More detailed bin information is also available. # For example l_binCut(hcases) # returns a (factor) vector of the bin definition # for each of the active points l_getBinIds(hcases) # returns the list of bins with # the row numbers of the active x # in each bin # which can be used to identify bins, # e.g. by colour hcases["active"] <- TRUE # makes sure all cases are active first hcases["color"] <- l_binCut(hcases) # NOTE: # - unique values of l_binCut(hcases) were used to select colours # plot(hcases) ######## # # Adding a second histogram # hsex <- l_hist(covidNZ$Sex, title = "NZ Covid Cases", xlabel = "Sex", linkingGroup = "CovidNZ") # NOTE: # - the histogram is assigned to a variable at creation (a good habit) # - Sex is a factor, so in R this would be a barplot # - "linkingGroup" is an arbitrary string to identify # a group of plots which will SHARE some display characteristics # - the inspector is now focused on the new histogram (see "World View") # - note that the name of the linking group appears in the inspector # - as elsewhere in the world, more females have tested positive than males # # # # TRY: # Click on the original histogram window (hcases) to bring it # to the top and make it the focus of the inspector. # # On the INSPECTOR (focused on hcases) # # 1. Go to the "linking group" pull down menu (down arrow) # Select "CovidNZ" # WHEN PROMPTED with "Synchronize .l0.hist?", SELECT "Push" # - What happened? # - What would have happened had you selected "Pull"? # # # On the Sex HISTOGRAM WINDOW # # 2. Select all of the "Female" bar # What is the selected part of the Cases histogram showing? # # Colour the selection pink # (it's OK and will be easier to remember :-) # Turn off the selection. # # 3. Select all of the "Male" bar # What is the selected part of the Cases histogram showing? # # Colour the selection light blue # (again, it's OK and will be easier to remember :-) # Turn off the selection. # # COLOUR THE Males grey again # # COLOUR THE Males light blue again # # # On the INSPECTOR (focused on either histogram) # # 4. Alternating between selecting the pink cases # and selecting the light blue cases, # OBSERVE the effect in the cases histogram. # # Statistically, # - what is being compared by this switching? # # Deselect all cases. # # 5. Select the males only. # Deactivate them # # Statistically, # - what do each of the histograms now display? # # Reactivate all cases. # # # On the HISTOGRAM WINDOW for the cases # # 6. Select the first FOUR (left most) bars in the histogram. # # Statistically, # - what is now being shown by the selection in the histogram for Sex? # # # On the INSPECTOR # # 7. With the selection just made # Invert the selection # Deactivate the new selection. # Rescale BOTH histograms to the "plot" # # Statistically, # - what do the data suggest about sex distribution # early in the pandemic in New Zealand? # - how many cases is this plot based on? # sum(hcases["active"]) # Reactivate all cases and rescale both histograms. # # 8. REPEAT steps 6 and 7 BUT this time focus on the # last (right most) 5 bars. # # # 9. Make the binwidth of hcases 2 # hcases["binwidth"] <- 2 # FOCUS the inspector on the HISTOGRAM WINDOW for the cases # # If you haven't already, reactivate all cases and # rescale both histograms to the "plot" # # Select "all" in the inspector and # colour all cases the a light yellow # # # 10. Select "brushing" from the inspector. # # Reshape the brush so that it is as tall as the height of # the histogram window (interior) and as wide as # the width of 4-5 histogram bars (> 4 but < 5) # # Beginning at the left of the CASES histogram, # brush the histogram bars from left to right and # OBSERVE the effect on the SEX histogram. # # Statistically, # - what is being observed in the SEX histogram? # - how does the ratio of females to males change? # - is there a point where the number of females # is the same as the number of males? # # 11. Now reshape the brush on the CASES histogram to narrow # its width to that of about one bar. # # Again, brush the histogram bars from left to right # BUT now hold down the key at the same time and # OBSERVE the effect on the SEX histogram. # # Statistically, # - what is being observed in the SEX histogram? # - how does the ratio of females to males change? # - is there a point where the number of females # is the same as the number of males? # # # Turn off the brushing on the CASES histogram and # deselect all cases. # # Use sweeping on the CASES histogram to show the # same information. # # # ######## # # More histograms # # # Overseas travel # htravel <- with(covidNZ, l_hist(Overseas_travel, title = "NZ Covid Cases", xlabel = "Overseas Travel", linkingGroup = "CovidNZ") ) # Questions: # Why does overseas travel have three bars? # # What is the sex of the person(s) in the leftmost bar? # # Had the earliest case travelled overseas? # # Have more males than females travelled overseas? # # Have more females travelled overseas than have not? # # Are overseas travellers more likely to be one sex or the other? # What about non-travellers? # # Age groups # hage <- with(covidNZ, l_hist(Age, title = "NZ Covid Cases", xlabel = "Age group", linkingGroup = "CovidNZ", swapAxes = TRUE) ) # Questions: # Of females from age 15 to 29, # have more travelled overseas than have not? # # What about those women 60 and over? Same answer? # # What about males under 30? # # What about males appearing early in the outbreak? late? # # Questions: # # Consider those early confirmed cases of either sex, # that is anyone in the leftmost 12 bars # (including those empty bins between bars as part of the bar count; # or up to and including the leftmost 8 bars of the histogram) # who are also from 40 to 59 years old. # # Are these cases more or less likely to have travelled overseas? # # What about only the females in this group? # # What about only the females aged 50 to 59 in this group? # # Is the same true for people under 30? # # ######## # # # We can accomplish the same thing with programmatic logic # earlyCases <- covidNZ$Case_date <= "2020-03-19" forties <- (covidNZ$Age == "40 to 49") fifties <- (covidNZ$Age == "50 to 59") under30 <- covidNZ$Age %in% c("20 to 29", "15 to 19", "10 to 14","05 to 09", "01 to 04", "<1") Female <- covidNZ$Sex == "Female" Male <- covidNZ$Sex == "Male" hcases["active"] <- earlyCases & (forties | fifties) l_scaleto_world(htravel) l_scaleto_world(hage) l_scaleto_world(hcases) l_scaleto_world(hsex) hcases["active"] <- earlyCases & (forties | fifties) & Female l_scaleto_world(htravel) l_scaleto_world(hage) l_scaleto_world(hcases) l_scaleto_world(hsex) ######## # # SLIDES: Interactive query logic # # See also l_web(page = "logicalQueries", directory = "articles", package = "loon") # # Note discussion with respect to missing data and linking keys # ######## # # Reactivating and rescaling everything # hcases["active"] <- TRUE l_scaleto_world(htravel) l_scaleto_world(hsex) l_scaleto_world(hage) l_scaleto_world(hcases) ######## # # FACETS # "facet: one part of a subject, situation, etc. that has many parts" # Cambridge English Dictionary # ######## # # Logical separation can also be effected creating different facets # # Existing plots can be facetted. # # NOTE: if you have loon version = 1.3.5 # first set the title to an empty string (to avoid plot() bug later) # if(packageVersion("loon") == "1.3.5") hcases["title"] <- "" # If loon version >= 1.3.6, no need to set title. # # E.g. hcaseBySex <- l_facet(hcases, by = covidNZ$Sex) # You will need to make the window larger. # # By default, the facets participate in the same linkingGroup # as the original. # # Note that plotting these does not work as well as it should # plot(hcaseBySex) # The value of the "by" argument can be any vector having the # same length as hcases["x"]. # # The unique values in "by" determine the facets. # # # Facets can be separate as well. # l_facet(hcases, by = covidNZ$Overseas_travel, layout = "separate") # # They can also be created when the histogram is created # originally using the "by" argument there. # AND depend on a LIST of "by" variables. under40 <- under30 | (covidNZ$Age == "30 to 39") hcaseByAge40Sex <- l_hist(covidNZ$Case_date, by = list(under40 = under40, sex = covidNZ$Sex), linkingGroup = "CovidNZ") # Note that the linkingGroup is now specified to join the others. # plot(hcaseByAge40Sex) # ... still not quite good enough (loon 1.3.0) # # TRY: # Scrolling and panning on one of the histograms in # the facetted display hcaseByAge40Sex # # # CLOSE: # the windows containing facets # (just to save screen space) # # # # # IN CASE you accidentally closed any (non-facetted) display # You can reconstruct any of the histograms # by uncommenting (in RStudio ... select and -C on a Mac) # and executing the corresponding # code below: # # hcases <- l_hist(covidNZ$Case_date, # binwidth = 2, yshows = "density", # xlabel = "Case date", # title = "New Zealand Covid 19 cases", # linkingGroup = "CovidNZ") # # hsex <- l_hist(covidNZ$Sex, # title = "NZ Covid Cases", xlabel = "Sex", # linkingGroup = "CovidNZ") # # htravel <- with(covidNZ, # l_hist(Overseas_travel, # title = "NZ Covid Cases", # xlabel = "Overseas Travel", # linkingGroup = "CovidNZ")) # hage <- with(covidNZ, # l_hist(Age, # title = "NZ Covid Cases", # xlabel = "Age group", # linkingGroup = "CovidNZ", # swapAxes = TRUE)) # # # # # ######## # # LAYERS # ######## # # TRY: # Focus the inspector on the CASES histogram # # # # Select the LAYERS TAB # - click on the "eyeball" with a stroke through it # - click on the "eyeball" with NO stroke # # Click on the AGE GROUPS histogram window # - how has the inspector changed? # - click on the down arrow. What happened? # - click on the up arrow. What happened? # # What are these "layers"? # # ######## # # We can ADD NEW LAYERS # # The histogram hcases represents a "density" # (at least when "density" is checked) # # R lets us estimate one from the data # dens <- density(hcases["x"]) l_layer(hcases, dens, linewidth = 3, color = "grey50", dash = c(5,5), label = "density") # Focus the INSPECTOR on the CASES histogram # # In the inspector, look at the "id" of the "density" layer # # It will be one of l_layer_ids(hcases) # # With the id (here "layer0") we can # manipulate layers programmatically too # E.g. l_layer_lower(hcases, "layer0") l_layer_raise(hcases, "layer0") l_layer_hide(hcases, "layer0") l_layer_show(hcases, "layer0") # Even the interactive "model" layer. # E.g. l_layer_hide(hcases, "model") l_layer_show(hcases, "model") # # Could also have made the density solid # by treating its x and y values as polygon coordinates # dens_poly <- l_layer_polygon(hcases, x = dens$x, y = dens$y, linewidth = 0.5, color = "white", linecolor = "white", label = "poly density") # # which can be referred to now by its R variable l_layer_lower(hcases, dens_poly) l_layer_hide(hcases, dens_poly) l_layer_show(hcases, dens_poly) # TRY: # 1. Arrange # the polygon layer to be at the bottom # the model layer next, and # the dashed density on top. plot(hcases) # 2. Return the "Analysis" tab # to the top of the inspector # # # # # NOTE: There are lots of layering functions # See help("l_layer") # ######## ######## # # Back to the ANALYSIS # # # covidNZ also has some information on flights. # # E.g. "Flight_departure_date", "Arrival_date", and "Flight_number" # # These might be related. # # ######## # # Scatterplots in loon ... l_plot(x, y, ...) # ######## # # # # E.g. # # Arrival_date should be related to Flight_departure_date # # Moreover, the plot points are really identified by # the Flight_number and perhaps the "Last_country_visited" # (if known). # # Like histograms, l_plot() takes many arguments. # One, "itemLabel", can be used to identify the points. # # Can build some labels from the available information flight_label <- with(covidNZ, paste0("Flight ", Flight_number, "\n", # Newline character "From: ", Last_country_visited) ) ptraveltime <- l_plot(x = covidNZ$Flight_departure_date, y = covidNZ$Arrival_date, xlabel = "Departure date", ylabel = "Arrival date", title = "Overseas travellers", itemLabel = flight_label, linkingGroup = "CovidNZ") # # NOTE: # # 1. The INSPECTOR changed when it focused on the scatterplot # # 2. In the console a warning message appeared. # - What was this about? # - Why did it happen? # # # # # TRY: # # On the INSPECTOR # (focused on "Overseas travellers" scatterplot) # # 1. Check the "itemLabels" box # # 2. Move the mouse over one of the points # (do not click; just hover) # # - Which flight has the latest departure date? # - Which flight has the earliest? # - Which flight was longest? # # 3. Select all flights taken by people in their 50s. # # Colour the selected points red # # 4. Select all the red points # # In the SCATTERPLOT # (NOTE: clicking on a plot's title bar focuses on that # plot without changing the selection in its interior.) # # Press and hold both the AND keys. # # With BOTH keys held down, click and hold # the LEFT mouse button on one of the selected points. # # While still holding ALL THREE down move the selected # points away from the rest and release all three. # # 5. Move the point of the most delayed flight to # join the group of red points and colour it red. # (NOTE: the key is NOT needed here ... why not?) # # 6. Select all the red points. # # What happened in the AGE GROUP histogram? # Why? # # Scale the scatterplot to these selected points. # # 7. Select all the red points. # # Near the bottom of the inspector is a # collection of buttons beside the word "move" # # - Press the button with only horizontal lines. # What if you press it again? # What does this button do? # # - Press the button with only vertical lines. # What if you press it again? # What does this button do? # # - Press the button with an array of dots # What if you press it again? # What does this button do? # # - Press the button immediately to the right # of the dot array button. # What if you press it again? # What does this button do? # # - Press the button at the far left # What if you press it again? # What does this button do? # # - Press the button second from the far left # What if you press it again? # What does this button do? # # - Press the button on the far right # What if you press it again? # What does this button do? # # 8. Scrolling and panning works the same as it did # with the histograms. # Scale the plot to fit ALL the points. # # 9. At the bottom of the inspector are four buttons # beside the word "size" grouped in pairs designated # "abs" for "absolute" and "rel" for "relative" # # + and - buttons increase and decrease size. # # Select the RED points. # - Use either + button to increase the size of the # points. Press the button a few times. # # - Change the shape of the "glyph" of all the red # points. # # - "Jitter" the RED points # # Select ALL points. # - Use the "rel" size buttons to DECREASE the sizes # of all points. # - Use the "abs" size buttons to INCREASE the sizes # of all points. # What is the difference between "rel" and "abs"? # # # # As can be seen the scatterplot produced by l_plot() # has much in common with the histograms produced by # l_hist() # E.g. class(hcases) class(ptraveltime) # # Like l_hist, an l_plot has many properties that # can be accessed and changed programmatically. # # Some are the same as l_hist (e.g. "color" or "linkingGroup) # and # some are different (e.g. "size" or "glyph") # See names(ptraveltime) # # E.g. we could randomly change the glyphs and the sizes # of points in the scatterplot as follows # pointsizes <- seq(1:20) pointcolors <- l_getColorList()[1:5] pointglyphs <- l_primitiveGlyphs() pointglyphs pointglyphs <- pointglyphs[c(3, 6, 9, 12)] # "closed" glyphs nPoints <- ptraveltime["n"] nPoints ptraveltime["size"] <- sample(pointsizes, nPoints, replace = TRUE) ptraveltime["color"] <- sample(pointcolors, nPoints, replace = TRUE) ptraveltime["glyph"] <- sample(pointglyphs, nPoints, replace = TRUE) # # Notice the effect on the histograms # # And we can move them around # # by jittering l_move_jitter(ptraveltime, which = "active", amount = 2) # or wherever we want xrange <- range(ptraveltime["x"]) ptraveltime["xTemp"] <- runif(nPoints, min = min(xrange), max = max(xrange)) yrange <- range(ptraveltime["y"]) ptraveltime["yTemp"] <- runif(nPoints, min = min(yrange), max = max(yrange)) plot(ptraveltime) # # And move them back # l_move_reset(ptraveltime, which = "active") plot(ptraveltime) ######## # # A second scatterplot # # What about measuring how long it took to be # a confirmed case AFTER ARRIVAL from overseas? # time2case <- covidNZ$Case_date - covidNZ$Arrival_date # # Might there be a relationship between "time2case" and # the "Case_date"? # ptime2case <- l_plot(x = covidNZ$Arrival_date, y = time2case, xlabel = "Arrival date", ylabel = "Time to case", title = "Time to case since arrival", itemLabel = flight_label, showItemLabels = TRUE, linkingGroup = "CovidNZ") # # QUESTION: # What features of the points are the same in the two # scatterplots? # What features are different? # l_getLinkedStates(ptraveltime) l_getLinkedStates(ptime2case) l_setLinkedStates(ptraveltime, states = c(l_getLinkedStates(ptraveltime), "glyph")) l_setLinkedStates(ptime2case, states = c(l_getLinkedStates(ptime2case), "glyph")) # Hereafter the primitive glyph will be linked as well. ptraveltime["glyph"] <- sample(pointglyphs, nPoints, replace = TRUE) # glyph is NOT shared by default. # (Note: both plots must have the same glyphs available.) # # # TRY: # # In the "Overseas travellers" plot # # 1. Select all points and make them all # - closed circles # - the same size # - the colour pink # # What has happened in the "Time to case" plot? # # # PREP: # ON YOUR SCREEN, if possible, make all of the # following visible at once: # - the CASES histogram # - the AGE histogram # - the SEX histogram (could make this smaller) # - the OVERSEAS travel histogram (could make this smaller) # - the "Time to case plot" # - the INSPECTOR # This might take some tidying up. # # THEN: Turn on brushing in the # "Time to case" scatterplot # # In the "Time to case" scatterplot # # 2. Move the (now square) brush around in the # scatterplot and observe the effect # in all histograms. # # The highlighted part of each histogram shows # a conditional distribution. # # Question: What is being conditioned on? # # Ans: the values of at least three variables # (non NA being a fourth) # # RESHAPE the brush to be tall and thin and brush again. # - Now what is being conditioned on? # # TURN OFF the brush in the scatterplot # # # In the "Time to case" scatterplot (no brush) # # 3. Select all pink points # - invert the selection # - color the selected points light blue # # Deselect all points. # # Explain the existence of the "light blue" points? # And their absence from the "Overseas travellers" plot # (Hint: data missing on ANY variable used to build # the plot will cause that case to NOT appear) # # 4. # # 5. Look at the case date histogram (hcases). # - colour all red cases to light yellow. # - compare distributions by # selecting one colour after another. # - deselect all colours # plot(hcases) # # or hcasesByColor <- l_facet(hcases, by = "color", layout = "wrap", ncol = 1) # (Note that "color" is an "n-dimensional state" of hcases.) # # Move the model layer up by hand in each plot, and rescale # OR programmatically as for (hist in hcasesByColor){ l_layer_raise(hist, "model") l_scaleto_world(hist) } # # It should look like this now plot(hcasesByColor) # Question: How would you describe the apparent pattern # by colour? # # Look at the Overseas Travel HISTOGRAM # Are all yellows overseas travellers? # # Look at the Age Group HISTOGRAM # See any real differences by colour? # # ######## # # LAYERS again # ######## # # We could add a smooth (loess fitted) curve to the # scatterplot ptime2case of time to case versus arrival lsmooth <- l_layer_smooth(ptime2case, linecolor = "black", linewidth = 5, linedash = c(10,2)) # Can hide that layer # l_layer_hide(ptime2case, lsmooth) # And show it again # l_layer_show(ptime2case, lsmooth) # # What happened? # # 1. With the inspector focussed on the scatterplot # # Open the layers tab # # What do you see? # # 2. Arrange so that the only layers that are visible are # - the model layer # - the layer containing the confidence region for the # smooth. # plot(ptime2case) # ######## # # NOTE # There are LOTS of built in layers that are useful # - maps from various libraries # (e.g. "maps", "maptools", "sp" package, ...) # - maps from external sources (e.g. "shp" files) # - polygons, lines, text, etc. # - contour lines # - raster images # See help("l_layer") # ######## # # # # One last histogram, this time of the last country visited. hCountries <- l_hist(as.character(covidNZ$Last_country_visited), showFactors = TRUE, # Generally not shown for # 25 or more levels swapAxes = TRUE, title = "country last visited", xlabel = "country", linkingGroup = "CovidNZ") # # 1. What country contains the most yellow? # - what does yellow mean again? # # 2. Deactivate those cases which had NO overseas travel # - rescale the last country visited plot # # 3. China appears to be missing. Why? # Note: the earliest dates are min(covidNZ$Case_date) min(covidNZ$Flight_departure_date, na.rm = TRUE) min(covidNZ$Arrival_date, na.rm = TRUE) # plot(hCountries) # ################################################################################