library(dplyr)
library(dummies)
library(stringr)
library(ggplot2)
library(glmnet)
library(splines)
options(warn=-1) #suppress warnings

1 The Ma On Shan Housing Data

This session introduces regularization techniques using an example of shrinkage methods.

MOS_Data: The data set are all transactions on Ma On Shan between Sept and Nov 2016.

It consists of 39 variables:

MOS_Data <- readRDS("Ma_On_Shan_Transactions.Rds")
colnames(MOS_Data)
##  [1] "Name"              "Block"             "SellPrice"        
##  [4] "Floor"             "GFArea"            "NGRatio"          
##  [7] "Time"              "Direction"         "Month.Built"      
## [10] "Year.Built"        "Squash_facility"   "Table_Tennis"     
## [13] "Outdoor_Pool"      "Sauna"             "Pool_table"       
## [16] "THX_Theatre"       "Fitness_facility"  "Dancing_facility" 
## [19] "Kids_facility"     "Mall"              "Basketball_Court" 
## [22] "Tennis_Court"      "Garden"            "Jogging_Path"     
## [25] "Warm_Pool"         "MTR"               "Balcony"          
## [28] "Ceiling.Height"    "Bed.Rooms"         "Bath.Room"        
## [31] "Multipurpose.Room"

Create the variable log of “selling price per square foot”"

MOS_Data$GFPrice <- MOS_Data$SellPrice*10000/MOS_Data$GFArea
MOS_Data$LGFPrice <- log(MOS_Data$GFPrice)

1.1 Exploratory Analysis

  • Obtain a horizontal Bar plot on the average selling price per square foot per Estate.
Mean_Price_per_Estate <- MOS_Data %>% group_by(Name) %>% summarise(mean=mean(GFPrice)) %>% arrange(desc(mean))
Mean_Price_per_Estate <- as.data.frame(Mean_Price_per_Estate)
head(Mean_Price_per_Estate)
##                    Name     mean
## 1 Sunshine City Phase 2 8119.103
## 2 Sunshine City Phase 1 7920.339
## 3           Lake Silver 7907.547
## 4 Sunshine City Phase 4 7877.253
## 5 Sunshine City Phase 3 7802.873
## 6       Bayshore Towers 7792.163
  • Visualize with decreasing averages prices
p <- ggplot(data=Mean_Price_per_Estate,aes(x=reorder(Name, mean),y=mean,fill=Name))
p + geom_bar(stat="identity") + coord_flip() +
ggtitle("Mean Prices per square foot per Estates in Ma On Shan") +
xlab("Estates in Ma On Shan") + 
ylab("Mean Price per square foot")+ theme(axis.text.x = element_text(angle = 90, hjust = 1))

  • Visualize with correlations heatmap
library(ggcorrplot)
corr <- cor(MOS_Data[,-1])
ggcorrplot(corr)