02 – Data Types

Sebastian Raschka

Source file: https://github.com/rasbt/R-notes/blob/master/02-datatypes.Rmd

R Resources and Help

Executing code

R notes screenshot - One thing to notice about R, coming from Python or other programming languages, is the somewhat unusual assignment operator <-. We use the assignment operator to assign an expression (right side of the assignment operator) to a variable (left side of the assignnment operator). For example

my_variable <- 3+5
my_variable

## [1] 8

Data Types

Floats (real numbers)

typeof(1)

## [1] "double"

class(1)

## [1] "numeric"

typeof(1.0)

## [1] "double"

class(1.0)

## [1] "numeric"

Integers

my_int <- 1L
my_int

## [1] 1

typeof(my_int)

## [1] "integer"

class(my_int)

## [1] "integer"
my_float <- as.double(my_int)
typeof(my_float)

## [1] "double"

my_int <- as.integer(my_float)
typeof(my_int)

## [1] "integer"

Boolean

Data Structures

Vectors and Sequences

x <- vector(mode = "double", length = 3)
x

## [1] 0 0 0
x[1] <- 0.1
x[2] <- 0.2
x[3] <- 0.3
x

## [1] 0.1 0.2 0.3
x <- c(0.1, 0.2, 0.3)
x

## [1] 0.1 0.2 0.3

typeof(x)

## [1] "double"
x <- c(1L, 0.2, 0.3, 1L, 10L)
x

## [1]  1.0  0.2  0.3  1.0 10.0

class(x)

## [1] "numeric"

typeof(x)

## [1] "double"
x <- c(5, "a")
x

## [1] "5" "a"

class(x)

## [1] "character"

typeof(x)

## [1] "character"
x <- c(2, TRUE, FALSE)
x

## [1] 2 1 0

class(x)

## [1] "numeric"

typeof(x)

## [1] "double"

Sequence vectors

1:10

##  [1]  1  2  3  4  5  6  7  8  9 10

Dot product

x <- c(0.1, 0.2, 0.3)
x * x

## [1] 0.01 0.04 0.09
x %*% x

##      [,1]
## [1,] 0.14

Strings / Character Vectors

paste("abc", "def", sep="")

## [1] "abcdef"
sprintf("%d + %d = %d", 1, 2, 3)

## [1] "1 + 2 = 3"
%s  a string
%d  an integer
%0xd    an integer padded with x leading zeros
%f  decimal notation with six decimals
%.xf    floating point number with x digits after decimal point
%e  compact scientific notation, e in the exponent
%E  compact scientific notation, E in the exponent
%g  compact decimal or scientific notation (with e)

Missing Values

vector_with_na <- c(NA, 0.1, 0.5)
is.na(vector_with_na)

## [1]  TRUE FALSE FALSE

vector_with_nan <- c(NaN, 0.1, 0.5)
is.nan(vector_with_nan)

## [1]  TRUE FALSE FALSE
is.na(vector_with_nan)

## [1]  TRUE FALSE FALSE

is.nan(vector_with_na)

## [1] FALSE FALSE FALSE

Lists

x <- list(1.5, 1L, "A")
typeof(x)

## [1] "list"

x

## [[1]]
## [1] 1.5
## 
## [[2]]
## [1] 1
## 
## [[3]]
## [1] "A"
x[2]

## [[1]]
## [1] 1

One-Indexing

NA values

Matrices

my_matrix <- matrix(nrow = 4, ncol = 3)
my_matrix

##      [,1] [,2] [,3]
## [1,]   NA   NA   NA
## [2,]   NA   NA   NA
## [3,]   NA   NA   NA
## [4,]   NA   NA   NA

class(my_matrix)

## [1] "matrix" "array"
my_matrix[1,1]<- 1.1
my_matrix[2,1] <- 2.1
my_matrix[2,2] <- 2.2
my_matrix

##      [,1] [,2] [,3]
## [1,]  1.1   NA   NA
## [2,]  2.1  2.2   NA
## [3,]   NA   NA   NA
## [4,]   NA   NA   NA
my_matrix <- matrix(1:12, nrow = 4, ncol = 3)
my_matrix

##      [,1] [,2] [,3]
## [1,]    1    5    9
## [2,]    2    6   10
## [3,]    3    7   11
## [4,]    4    8   12
dim(my_matrix)

## [1] 4 3
my_vector <- c(1:12)
my_vector

##  [1]  1  2  3  4  5  6  7  8  9 10 11 12

my_matrix <- my_vector
dim(my_matrix) <- c(4, 3)
my_matrix

##      [,1] [,2] [,3]
## [1,]    1    5    9
## [2,]    2    6   10
## [3,]    3    7   11
## [4,]    4    8   12
my_matrix <- cbind(1:3, 4:6)
my_matrix

##      [,1] [,2]
## [1,]    1    4
## [2,]    2    5
## [3,]    3    6
my_matrix <- rbind(1:3, 4:6)
my_matrix

##      [,1] [,2] [,3]
## [1,]    1    2    3
## [2,]    4    5    6

Matrix Multiplication

my_matrix <- matrix(1:6, 3, 2)
my_matrix

##      [,1] [,2]
## [1,]    1    4
## [2,]    2    5
## [3,]    3    6

my_matrix * my_matrix

##      [,1] [,2]
## [1,]    1   16
## [2,]    4   25
## [3,]    9   36
t(my_matrix) %*% my_matrix

##      [,1] [,2]
## [1,]   14   32
## [2,]   32   77

Other Handy Matrix Functions

my_matrix <- matrix(1:6, 3, 2)
my_matrix

##      [,1] [,2]
## [1,]    1    4
## [2,]    2    5
## [3,]    3    6

rowSums(my_matrix)

## [1] 5 7 9

Factors

x <- factor(c('great', 'good', 'bad', 'ok', 'good', 'good', 'great'))
x

## [1] great good  bad   ok    good  good  great
## Levels: bad good great ok
table(x)

## x
##   bad  good great    ok 
##     1     3     2     1
x <- factor(c('great', 'good', 'bad', 'ok', 'good', 'good', 'great'),
            levels=c('great', 'good', 'ok', 'bad'))
x

## [1] great good  bad   ok    good  good  great
## Levels: great good ok bad

table(x)

## x
## great  good    ok   bad 
##     2     3     1     1

Data Frames

df <- data.frame(MyIntegerVar = 1:4, MyCharVar = c("A", "B", "A", "B"), MyBoolVar = c(T, F, T, T))
df

##   MyIntegerVar MyCharVar MyBoolVar
## 1            1         A      TRUE
## 2            2         B     FALSE
## 3            3         A      TRUE
## 4            4         B      TRUE
nrow(df)

## [1] 4

ncol(df)

## [1] 3
names(df)

## [1] "MyIntegerVar" "MyCharVar"    "MyBoolVar"

names(df) <- c("A", "B", "C")
df

##   A B     C
## 1 1 A  TRUE
## 2 2 B FALSE
## 3 3 A  TRUE
## 4 4 B  TRUE
df <- data.frame(MyIntegerVar1 = 1:4, MyIntegerVar2 = 5:8)
df

##   MyIntegerVar1 MyIntegerVar2
## 1             1             5
## 2             2             6
## 3             3             7
## 4             4             8

colSums(df)

## MyIntegerVar1 MyIntegerVar2 
##            10            26

colMeans(df)

## MyIntegerVar1 MyIntegerVar2 
##           2.5           6.5
rowSums(df)

## [1]  6  8 10 12

rowMeans(df)

## [1] 3 4 5 6

Tables

A very useful data structure in R are tables created via the tables function. Tables provide us with counts for each unique element in a vector. For example

c

letters <- c("a", "b", "a", "a", "b", "b", "a", "d", "f", "f", "f")
table(letters)

## letters
## a b d f 
## 4 3 1 3

We can also use tables to extract the count for a specific item using the square-bracket selection syntax (more on that in chapter 03):

numbers <- c(1, 2, 1, 1, 2, 2, 1, 4, 6, 6, 6)
t <- table(numbers)