Publications by Daniel Lee
Document
# Load packages # Core library(tidyverse) library(lubridate) library(nycflights13) CH20 Vectors Introduction Vector Basics Important tyoes of atomic vectors Using atomic vectors sample(10) + 10 ## [1] 16 15 20 18 11 13 12 17 14 19 1:10 + 1:2 ## [1] 2 4 4 6 6 8 8 10 10 12 1:10 + 1:3 ## [1] 2 4 6 5 7 9 8 10 12 11 data.frame(a...
274 sym R (2126 sym/52 pcs)
Document
library(tidyverse) ## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ── ## ✔ ggplot2 3.3.6 ✔ purrr 0.3.5 ## ✔ tibble 3.1.8 ✔ dplyr 1.0.10 ## ✔ tidyr 1.2.1 ✔ stringr 1.4.1 ## ✔ readr 2.1.2 ...
352 sym R (2073 sym/9 pcs) 2 img
Document
Import your data data("mtcars") mtcars <- as_tibble(mtcars) data <- read_excel("../00_data/myData.xlsx") ## New names: ## • `` -> `...1` data ## # A tibble: 4,810 × 24 ## ...1 rank position hand player years total…¹ status yr_st…² season age ## <dbl> <dbl> <chr> <chr> <chr> <chr> <dbl> <chr> <dbl> <chr> <dbl> ##...
372 sym Python (11308 sym/41 pcs) 1 img
Apply 5
Import data # excel file Chocolate <- read_excel("data/Chocolate.xlsx") Chocolate ## # A tibble: 191 × 10 ## https://www…¹ compa…² compa…³ revie…⁴ count…⁵ speci…⁶ cocoa…⁷ ingre…⁸ most_…⁹ ## <dbl> <chr> <chr> <dbl> <chr> <chr> <dbl> <chr> <chr> ## 1 2454 5150 U.S.A. 2...
83 sym Python (4302 sym/6 pcs)
Code Analog
Import Data flights ## # A tibble: 336,776 × 19 ## year month day dep_time sched_de…¹ dep_d…² arr_t…³ sched…⁴ arr_d…⁵ carrier ## <int> <int> <int> <int> <int> <dbl> <int> <int> <dbl> <chr> ## 1 2013 1 1 517 515 2 830 819 11 UA ## 2 2013 1 1 5...
154 sym Python (17180 sym/36 pcs)
Document
Introduction Questions Variation Visualizoing Distributions diamonds %>% ggplot(aes( x = cut)) + geom_bar() diamonds %>% ggplot(mapping = aes(x = carat)) + geom_histogram(binwidth = 0.5) diamonds %>% filter(carat < 3) %>% ggplot(aes(x = carat)) + geom_histogram(bindwidth = 0.5) ## Warning: Ignoring unknown param...
289 sym R (1968 sym/21 pcs) 15 img
Code Analog 6
##Introduction ##Questions What type of variation occurs within my variables? What type of covariation occurs between my variables? Variation Visualizing distributions ggplot(data = diamonds) + geom_bar(mapping = aes(x = cut)) diamonds %>% count(cut) ## # A tibble: 5 × 2 ## cut n ## <ord> <int> ## 1 Fair 1610 ## 2 ...
210 sym Python (1384 sym/12 pcs) 6 img
Document
Import Data data <- read_excel("../00_data/myData.xlsx") ## New names: ## • `` -> `...1` data ## # A tibble: 4,810 × 24 ## ...1 rank position hand player years total…¹ status yr_st…² season age ## <dbl> <dbl> <chr> <chr> <chr> <chr> <dbl> <chr> <dbl> <chr> <dbl> ## 1 1 1 C Left Wayne G… 1979…...
421 sym R (2306 sym/14 pcs) 8 img
Revised Apply 6
Import Data # excel file Chocolate <- read_excel("../00_data/Chocolate.xlsx") Chocolate ## # A tibble: 191 × 10 ## ref compan…¹ compa…² revie…³ count…⁴ speci…⁵ cocoa…⁶ ingre…⁷ most_…⁸ rating ## <dbl> <chr> <chr> <dbl> <chr> <chr> <dbl> <chr> <chr> <dbl> ## 1 2454 5150 U.S.A. 2019 T...
223 sym 8 img
Code Analog 7
Tidy Data table1 %>% mutate(rate = cases / population * 10000) ## # A tibble: 6 × 5 ## country year cases population rate ## <chr> <int> <int> <int> <dbl> ## 1 Afghanistan 1999 745 19987071 0.373 ## 2 Afghanistan 2000 2666 20595360 1.29 ## 3 Brazil 1999 37737 172006362 2.19 ## 4 Brazil 2000 8...
133 sym R (3178 sym/20 pcs) 1 img