(<- class-heights ;; sex (0 - male; 1 - female) height (inches) last-digit of id (flatten (loop for i from 1 to 20 collect (let* ((sex (random-discrete-uniform :from 0 :to 1)) (height (eref (random-gaussian :location (if (zerop sex) 70 64) :scale 3) 0) ) (id (random-discrete-uniform :from 0 :to 9)) ) (list (if (zerop sex) ;; using strings here rather than numbers 0 and 1 "Male" "Female") height id) ) ) ) ) ;;; Here is where we might collect data rather than generate it. ;;; It is presently commented out by the enclosing #| ... |# #| (<- class-heights ;; sex (0 - male; 1 - female) height (inches) last-digit of id '("Male" 74 9 "Male" 68 8 "Male" 73 9 "Female" 64 7 "Female" 68 5 "Female" 72 2 "Male" 68 5 "Female" 64 9 "Female" 64 6 "Male" 68 4 "Male" 72 9 "Female" 61 3 "Male" 66 8 "Male" 67 8 "Male" 71 9 "Female" 63 6 "Male" 70 7 "Male" 70 4 "Male" 72 2 "Male" 75 3 "Male" 66 3 "Male" 70 1 ) ) |# (<- class-vars '(sex height-in-inches last-id-digit) ) (<- class-heights (array class-heights :dimensions (list (/ (length class-heights) (length class-vars)) (length class-vars)))) (<- class-heights (dataset class-heights :variates class-vars :name "STAT441/841 height data")) ;; some plots ;; (histogram :data class-heights :var 'height-in-inches :title "Heights in inches" :color *green-color* :fill? T :link? T :nbins 8) ;; just look at the cases batched together by sex (batch-display-list :data class-heights :scrollable? NIL :link? T :by 'sex :draw? T) ;; set up a density model for each sex ;; ;; (<- men (array (loop for i from 0 to (- (nrows class-heights) 1) when ;; it's a man (or (and (numberp (eref class-heights i 0)) (= (eref class-heights i 0) 0)) (string-equal "Male" (eref class-heights i 0))) ;; this when is more complicated than it needs to be just ;; so that both cases of using 0 and 1 or "Male" and "Female" ;; will work without any change to the rest of the code. collect (eref class-heights i 1)) ) ) (<- women (array (loop for i from 0 to (- (nrows class-heights) 1) when ;; it's a woman (or (and (numberp (eref class-heights i 0)) (= (eref class-heights i 0) 0)) (string-equal "Female" (eref class-heights i 0))) ;; as with men, ;; this when is more complicated than it needs to be just ;; so that both cases of using 0 and 1 or "Male" and "Female" ;; will work without any change to the rest of the code. collect (eref class-heights i 1)) )) (<- hm (histogram :data men :var 0 :fill? T :color *light-blue-color* :title "Men's height" :bottom-label "height" :histogram-scale :density)) (<- hw (histogram :data women :var 0 :fill? T :color *gray-color* :title "Women's height" :bottom-label "height" :histogram-scale :density)) (<- gw (function-plot :domain '(55 80) :nlines 100 :function (function (lambda (x) (density-gaussian x :location (mean women) :scale (sd women)))) :draw? T :width 4 :left-label "density" :bottom-label "Height in inches" :title "Two gaussians, modelling heights; women red, men blue" :color *red-color*)) (set-tics (left-view-of gw) '(0 0.1 0.2)) (<- gm (function-view :domain '(55 80) :nlines 100 :function (function (lambda (x) (density-gaussian x :location (mean men) :scale (sd men)))) :draw? NIL :width 4 :color *blue-color*)) (layer-view (interior-view-of gw) gm ) ;;; Now let's layer these on top of the corresponding histograms to see how ;;; well these fit. (set-tics (left-view-of hw) '(0 0.1 0.2)) (set-tics (bottom-view-of hw) '(55 60 65 70 75 80)) (layer-view (interior-view-of hw) (interior-view-of gw) ) (set-tics (left-view-of hm) '(0 0.1 0.2)) (set-tics (bottom-view-of hm) '(55 60 65 70 75 80)) (layer-view (interior-view-of hm) gm ) ;;; Now put together as a mixture (<- prop-female (/ (number-of-elements women) (nrows class-heights))) (<- gwm (function-view :domain '(56 80) :nlines 100 :function (function (lambda (x) (+ (* prop-female (density-gaussian x :location (mean women) :scale (sd women))) (* (- 1 prop-female) (density-gaussian x :location (mean men) :scale (sd men)))) ) ) :draw? NIL :width 4 :color *green-color*)) (layer-view (interior-view-of gw) gwm ) (layer-view gm gwm ) ;;; ;;; Some examples of mixtures of two gaussians ;;; (defun draw-gaussian-mix (&key (location1 10) (scale1 1) (location2 15) (scale2 2) (mix-prob 0.5)) (let (g1 g2 gm (xmin (min (- location1 (* 3 scale1)) (- location2 (* 3 scale2)))) (xmax (max (+ location1 (* 3 scale1)) (+ location2 (* 3 scale2)))) (max-height (max (density-gaussian location1 :location location1 :scale scale1) (density-gaussian location2 :location location2 :scale scale2)) ) ) (<- g1 (function-plot :domain (list xmin xmax) :nlines 100 :function (function (lambda (x) (density-gaussian x :location location1 :scale scale1))) :draw? T :width 4 :left-label "density" :bottom-label "X" :title (format NIL "Mixing two gaussians: ~3,2F G(~a, ~a) + ~3,2F G(~a, ~a)" mix-prob location1 scale1 (- 1 mix-prob) location2 scale2) :color *red-color*)) (set-tics (left-view-of g1) (list 0 0.1 (* .1 (round max-height .1)))) (<- g2 (function-view :domain (list xmin xmax) :nlines 100 :function (function (lambda (x) (density-gaussian x :location location2 :scale scale2))) :draw? NIL :width 4 :color *blue-color*) ) (layer-view (interior-view-of g1) g2 ) (<- gm (function-view :domain (list xmin xmax) :nlines 100 :function (function (lambda (x) (+ (* mix-prob (density-gaussian x :location location1 :scale scale1)) (* (- 1 mix-prob) (density-gaussian x :location location2 :scale scale2))) ) ) :draw? NIL :width 4 :color *green-color*)) (layer-view (interior-view-of g1) gm ) ) ) ;;; try some (draw-gaussian-mix) (draw-gaussian-mix :mix-prob 0.1) (draw-gaussian-mix :mix-prob 0.7) (draw-gaussian-mix :mix-prob 0.9) (draw-gaussian-mix :location1 12) (draw-gaussian-mix :location1 12 :location2 13) (draw-gaussian-mix :location1 12 :location2 20 :mix-prob 1/3) ;;; Exploring some of the other features and dimensions of the data ;;; (dot-plot :data class-heights :var 'last-id-digit :size 12 :link? T :title "Random digit") ;; display the cases in a list (case-display-list :data class-heights :scrollable? T :link? T :draw? T) (<- sp (scatterplot :data class-heights :x 'last-id-digit :y 'height-in-inches :link? T))