E Base R vs. Tidyverse

library(tidyverse)

We have worked primarily with the tidyverse tools in this course. While many of the advantages of the tidyverse tools for data wrangling are their simplicity and human readability, there is actually nothing that tidyverse does which cannot be similarly achieved with baseR commands. If you are interested in comparing and contrasting check out the examples below.

These all use the iris dataset, a dataset that comes with baseR (just type iris).

E.1 Extract variables

iris$Sepal.Length # single variable
iris[, c("Species", "Petal.Width")] # by name
iris[, c(5, 4)]  # by column index

select(iris, Species)
select(iris, Species, Petal.Width) # by name
select(iris, 5, 4)  # by column index

E.2 Make new variables

iris$Petal.Ratio <- iris$Petal.Length/iris$Petal.Width

iris$Sepal.Ratio <- iris$Sepal.Length/iris$Sepal.Width

mutate(iris, 
       Petal.Ratio = Petal.Length/Petal.Width,
       Sepal.Ratio = Sepal.Length/Sepal.Width)

Extract observations (rows)

# Using [,]
iris[iris$Petal.Width > 0.5 & iris$Species == "setosa", ]

# Using subset (works very much like dplyr::filter)
subset(iris, Petal.Width > 0.5 & Species == "setosa")

	Sepal.Length	Sepal.Width	Petal.Length	Petal.Width	Species
44	5	3.5	1.6	0.6	setosa

	Sepal.Length	Sepal.Width	Petal.Length	Petal.Width	Species
44	5	3.5	1.6	0.6	setosa

filter(iris, Petal.Width > 0.5 & Species == "setosa")

Sepal.Length	Sepal.Width	Petal.Length	Petal.Width	Species
5	3.5	1.6	0.6	setosa

E.3 Arrange observations (rows)

# descending order of species (alphabetic) followed by ascending order of Petal.Width
iris[order(rev(iris$Species), iris$Petal.Width) , ]

# descending order of species (alphabetic) followed by ascending order of Petal.Width
arrange(iris, desc(Species), Petal.Width)

E.4 Summarise observations (rows)

# Manually create a data.frame
data.frame(Petal.Length.mean = mean(iris$Petal.Length),
           Petal.Length.sd = sd(iris$Petal.Length),
           Sepal.Length.mean = mean(iris$Sepal.Length),
           Sepal.Length.sd = sd(iris$Sepal.Length))

Petal.Length.mean	Petal.Length.sd	Sepal.Length.mean	Sepal.Length.sd
3.758	1.765298	5.843333	0.8280661

summarise(iris, 
          Petal.Length.mean = mean(Petal.Length),
          Petal.Length.sd = sd(Petal.Length),
          Sepal.Length.mean = mean(Sepal.Length),
          Sepal.Length.sd = sd(Sepal.Length))

Petal.Length.mean	Petal.Length.sd	Sepal.Length.mean	Sepal.Length.sd
3.758	1.765298	5.843333	0.8280661

E.5 Grouped operations

# First operate in the data.frame by group (split-apply)
mtcars_by <- by(mtcars, 
   INDICES = list(mtcars$cyl, mtcars$gear),
   FUN = function(x){
     data.frame(cyl = unique(x$cyl),
                gear = unique(x$gear),
                mpg.mean = mean(x$mpg),
                mpg.sd = sd(x$mpg),
                wt.mean = mean(x$wt),
                wt.sd = sd(x$wt))
   })

# Then combine the results into a data.frame
do.call(rbind, mtcars_by)

cyl	gear	mpg.mean	mpg.sd	wt.mean	wt.sd
4	3	21.500	NA	2.465000	NA
6	3	19.750	2.3334524	3.337500	0.1732412
8	3	15.050	2.7743959	4.104083	0.7683069
4	4	26.925	4.8073604	2.378125	0.6006243
6	4	19.750	1.5524175	3.093750	0.4131460
4	5	28.200	3.1112698	1.826500	0.4433560
6	5	19.700	NA	2.770000	NA
8	5	15.400	0.5656854	3.370000	0.2828427

mtcars %>% 
  group_by(cyl, gear) %>% 
  summarise(mpg.mean = mean(mpg),
            mpg.sd = sd(mpg),
            wt.mean = mean(wt),
            wt.sd = sd(wt)) %>% 
  ungroup() # remove any groupings from downstream analysis

cyl	gear	mpg.mean	mpg.sd	wt.mean	wt.sd
4	3	21.500	NA	2.465000	NA
4	4	26.925	4.8073604	2.378125	0.6006243
4	5	28.200	3.1112698	1.826500	0.4433560
6	3	19.750	2.3334524	3.337500	0.1732412
6	4	19.750	1.5524175	3.093750	0.4131460
6	5	19.700	NA	2.770000	NA
8	3	15.050	2.7743959	4.104083	0.7683069
8	5	15.400	0.5656854	3.370000	0.2828427

E.6 Create new columns as calculations

# First operate in the data.frame by group (split-apply)
iris_by <- by(iris, 
              INDICES = iris$Species, 
              FUN = function(x){
                x$Petal.Width.centered <- x$Petal.Width - mean(x$Petal.Width)
                return(x)
              })

# Then combine the results into a data.frame
do.call(rbind, iris_by)

iris %>% 
  group_by(Species) %>% 
  mutate(Petal.Width.centered = Petal.Width - mean(Petal.Width)) %>% 
  ungroup() # remove any groupings from downstream analysis

E.7 Filter rows with conditions evaluated by group

# First operate in the data.frame by group (split-apply)
widest_petals <- by(iris, 
                    INDICES = iris$Species, 
                    FUN = function(x){
                      x[x$Petal.Width == max(x$Petal.Width), ] 
                    })

# Then combine the results into a data.frame
do.call(rbind, widest_petals)

	Sepal.Length	Sepal.Width	Petal.Length	Petal.Width	Species	Petal.Ratio	Sepal.Ratio
setosa	5.0	3.5	1.6	0.6	setosa	2.666667	1.428571
versicolor	5.9	3.2	4.8	1.8	versicolor	2.666667	1.843750
virginica.101	6.3	3.3	6.0	2.5	virginica	2.400000	1.909091
virginica.110	7.2	3.6	6.1	2.5	virginica	2.440000	2.000000
virginica.145	6.7	3.3	5.7	2.5	virginica	2.280000	2.030303

iris %>% 
  group_by(Species) %>% 
  filter(Petal.Width == max(Petal.Width))

Sepal.Length	Sepal.Width	Petal.Length	Petal.Width	Species	Petal.Ratio	Sepal.Ratio
5.0	3.5	1.6	0.6	setosa	2.666667	1.428571
5.9	3.2	4.8	1.8	versicolor	2.666667	1.843750
6.3	3.3	6.0	2.5	virginica	2.400000	1.909091
7.2	3.6	6.1	2.5	virginica	2.440000	2.000000
6.7	3.3	5.7	2.5	virginica	2.280000	2.030303

E.8 Pivot data

reshape(iris, 
        varying = c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width"),
        timevar = "trait",
        idvar = "id",
        v.names = "measurement",
        direction = "long")

iris %>% 
 pivot_longer(cols = Sepal.Length:Petal.Width, values_to = "measurement", names_to = "trait" )

D Symbols

License