Example 1: Anscombe Dataset

x1 x2 x3 x4 y1 y2 y3 y4
10 10 10 8 8.04 9.14 7.46 6.58
8 8 8 8 6.95 8.14 6.77 5.76
13 13 13 8 7.58 8.74 12.74 7.71
9 9 9 8 8.81 8.77 7.11 8.84
11 11 11 8 8.33 9.26 7.81 8.47
14 14 14 8 9.96 8.10 8.84 7.04
6 6 6 8 7.24 6.13 6.08 5.25
4 4 4 19 4.26 3.10 5.39 12.50
12 12 12 8 10.84 9.13 8.15 5.56
7 7 7 8 4.82 7.26 6.42 7.91
5 5 5 8 5.68 4.74 5.73 6.89
?anscombe
DataX = anscombe[,c(1,5,2,6,3,7,4,8)]
summary(DataX)
##        x1             y1               x2             y2       
##  Min.   : 4.0   Min.   : 4.260   Min.   : 4.0   Min.   :3.100  
##  1st Qu.: 6.5   1st Qu.: 6.315   1st Qu.: 6.5   1st Qu.:6.695  
##  Median : 9.0   Median : 7.580   Median : 9.0   Median :8.140  
##  Mean   : 9.0   Mean   : 7.501   Mean   : 9.0   Mean   :7.501  
##  3rd Qu.:11.5   3rd Qu.: 8.570   3rd Qu.:11.5   3rd Qu.:8.950  
##  Max.   :14.0   Max.   :10.840   Max.   :14.0   Max.   :9.260  
##        x3             y3              x4           y4        
##  Min.   : 4.0   Min.   : 5.39   Min.   : 8   Min.   : 5.250  
##  1st Qu.: 6.5   1st Qu.: 6.25   1st Qu.: 8   1st Qu.: 6.170  
##  Median : 9.0   Median : 7.11   Median : 8   Median : 7.040  
##  Mean   : 9.0   Mean   : 7.50   Mean   : 9   Mean   : 7.501  
##  3rd Qu.:11.5   3rd Qu.: 7.98   3rd Qu.: 8   3rd Qu.: 8.190  
##  Max.   :14.0   Max.   :12.74   Max.   :19   Max.   :12.500

Mean and Standard Deviation

tmp = round(apply(DataX, 2, 'mean'),2)
tmp = rbind(tmp, round(apply(DataX, 2, 'sd'),2))
row.names(tmp) = c('Mean', 'Sd')
knitr::kable(tmp, digits = 2, caption = 'Anscombe Dataset')
Anscombe Dataset
x1 y1 x2 y2 x3 y3 x4 y4
Mean 9.00 7.50 9.00 7.50 9.00 7.50 9.00 7.50
Sd 3.32 2.03 3.32 2.03 3.32 2.03 3.32 2.03

Correlation

round(cor(DataX$x1,DataX$y1), 2)
## [1] 0.82
round(cor(DataX$x2,DataX$y2), 2)
## [1] 0.82
round(cor(DataX$x3,DataX$y3), 2)
## [1] 0.82
round(cor(DataX$x4,DataX$y4), 2)
## [1] 0.82

Scatter Plots

Example 2: Iris Dataset

?iris
DataX = iris
dim(DataX)       #ncol, nrow
## [1] 150   5
head(DataX, 10)  #also: tail
##    Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1           5.1         3.5          1.4         0.2  setosa
## 2           4.9         3.0          1.4         0.2  setosa
## 3           4.7         3.2          1.3         0.2  setosa
## 4           4.6         3.1          1.5         0.2  setosa
## 5           5.0         3.6          1.4         0.2  setosa
## 6           5.4         3.9          1.7         0.4  setosa
## 7           4.6         3.4          1.4         0.3  setosa
## 8           5.0         3.4          1.5         0.2  setosa
## 9           4.4         2.9          1.4         0.2  setosa
## 10          4.9         3.1          1.5         0.1  setosa
levels(DataX$Species)
## [1] "setosa"     "versicolor" "virginica"
summary(DataX)
##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##        Species  
##  setosa    :50  
##  versicolor:50  
##  virginica :50  
##                 
##                 
## 

Continuous univraite variable

?hist
par(mfrow=c(2,2))
hist(DataX$Sepal.Length, 
     main='Histogram (default)', xlab='Iris Sepal.Length', ylab='Count')   # title, xlab, ylab
hist(DataX$Sepal.Length, breaks=20, 
     main='Histogram (more bins)', xlab='Iris Sepal.Length', ylab='Count')   # number of bins
hist(DataX$Sepal.Length, breaks=20, freq=F, 
     main='Histogram (count vs. density)', xlab='Iris Sepal.Length', ylab='Density') # freq = F, density
hist(DataX$Sepal.Length, breaks=20, freq=F, col=5,
     main='Histogram (coloring)', xlab='Iris Sepal.Length', ylab='Density') # coloring

?density
par(mfrow=c(1,1))
hist(DataX$Sepal.Length, breaks=20, freq=F, xlab='Iris Sepal.Length', ylab='Density', main='')   # using freq=FALSE
lines(density(DataX$Sepal.Length), col=2, lty=2, lwd=2)
title(main='Histogram with Kernel Density Plot')

?boxplot
par(mfrow=c(2,2))
boxplot(DataX$Sepal.Length, 
        main='Boxplot of Sepal.Length')  # single boxplot
boxplot(DataX$Sepal.Width, 
        main='Boxplot of Sepal.Width')  # outliers
boxplot(DataX[,1:4], col=c(2,3,4,5),
        main='Side-by-side Boxplot')  # side-by-side boxplots
boxplot(DataX$Sepal.Width~DataX$Species, col=c(5,6,7),
        main="Boxplot with Grouping") # grouping