03 – Selection Syntax

Sebastian Raschka

Source file: https://github.com/rasbt/R-notes/blob/master/03-selection-syntax.Rmd

Selection Syntax

Selecting Elements from Vectors and Matrices (Single Bracket Indexing)

vec_a <- c("x", "y", "z")

vec_a[1] # return a vector containing the first element

## [1] "x"

vec_a[1:2] # return a vector containing the first two elements

## [1] "x" "y"
vec_b <- c(1:9)
vec_b

## [1] 1 2 3 4 5 6 7 8 9

vec_b[vec_b >= 5] 

## [1] 5 6 7 8 9
vec_b[(vec_b > 5) & (vec_b < 8)]

## [1] 6 7

How about Matrices?

my_matrix <- matrix(1:6, 3, 2) # 3x2 matrix
my_matrix

##      [,1] [,2]
## [1,]    1    4
## [2,]    2    5
## [3,]    3    6
my_matrix[2, 1]

## [1] 2
my_matrix[2, ]

## [1] 2 5
my_matrix[, 2]

## [1] 4 5 6
my_matrix_with_na = my_matrix # make a copy

my_matrix_with_na[1, 2] <- NA # overwrite element with NA value
my_matrix_with_na

##      [,1] [,2]
## [1,]    1   NA
## [2,]    2    5
## [3,]    3    6
mask <- is.na(my_matrix_with_na)
mask

##       [,1]  [,2]
## [1,] FALSE  TRUE
## [2,] FALSE FALSE
## [3,] FALSE FALSE
my_matrix_with_na[mask]

## [1] NA
!mask # inverted mask 

##      [,1]  [,2]
## [1,] TRUE FALSE
## [2,] TRUE  TRUE
## [3,] TRUE  TRUE

my_matrix_with_na[!mask]

## [1] 1 2 3 5 6

Selecting from Data Frames

df <- data.frame(MyIntegerVar = 1:4, MyCharVar = c("A", "B", "A", "B"), MyBoolVar = c(T, F, T, T))
df

##   MyIntegerVar MyCharVar MyBoolVar
## 1            1         A      TRUE
## 2            2         B     FALSE
## 3            3         A      TRUE
## 4            4         B      TRUE
df[3, ]

##   MyIntegerVar MyCharVar MyBoolVar
## 3            3         A      TRUE
df[, 3] # returns a vector

## [1]  TRUE FALSE  TRUE  TRUE

# or

df[3] # returns a data frame with 1 column

##   MyBoolVar
## 1      TRUE
## 2     FALSE
## 3      TRUE
## 4      TRUE

Different Ways for Selecting Columns from a Data Frame

df['MyBoolVar']

##   MyBoolVar
## 1      TRUE
## 2     FALSE
## 3      TRUE
## 4      TRUE
df$MyBoolVar

## [1]  TRUE FALSE  TRUE  TRUE

Selecting Elements from a Data Frame

df[2, 1]

## [1] 2
df[2, 'MyIntegerVar']

## [1] 2

or

df$MyIntegerVar[2]

## [1] 2

Conditional Selection

df[(df['MyIntegerVar'] > 1) & (df['MyBoolVar'] == TRUE), ] 

##   MyIntegerVar MyCharVar MyBoolVar
## 3            3         A      TRUE
## 4            4         B      TRUE