Heatmap
By Xiaochi Liu in R programming
library(tidyverse)
library(tidyquant)
sales_by_customer_tbl <- read_rds("sales_by_customer.rds")
sales_by_customer_tbl
## # A tibble: 270 × 3
## bikeshop_name product_category quantity
## <fct> <chr> <dbl>
## 1 Albuquerque Cycles Cross Country Race 48
## 2 Albuquerque Cycles Fat Bike 9
## 3 Albuquerque Cycles Over Mountain 13
## 4 Albuquerque Cycles Sport 35
## 5 Albuquerque Cycles Trail 38
## 6 Albuquerque Cycles Cyclocross 7
## 7 Albuquerque Cycles Elite Road 69
## 8 Albuquerque Cycles Endurance Road 54
## 9 Albuquerque Cycles Triathalon 13
## 10 Ann Arbor Speed Cross Country Race 32
## # … with 260 more rows
Data Wrangle
sales_by_customer_tbl %>%
group_by(bikeshop_name) %>%
mutate(prop = quantity / sum(quantity)) %>%
ungroup() %>%
select(- quantity) %>%
# AVOID THIS ROOKIE MISTAKE - DON'T FORGET TO SORT BY TOP PRODUCT ----
pivot_wider(id_cols = bikeshop_name,
names_from = product_category,
values_from = prop) %>%
arrange(- `Elite Road`) %>%
mutate(bikeshop_name = fct_reorder(bikeshop_name, `Elite Road`)) %>%
pivot_longer(cols = - bikeshop_name,
names_to = "product_category",
values_to = "prop") -> prop_sales_by_customer_tbl
prop_sales_by_customer_tbl
## # A tibble: 270 × 3
## bikeshop_name product_category prop
## <fct> <chr> <dbl>
## 1 Indianapolis Velocipedes Cross Country Race 0.103
## 2 Indianapolis Velocipedes Fat Bike 0.0125
## 3 Indianapolis Velocipedes Over Mountain 0.0125
## 4 Indianapolis Velocipedes Sport 0.116
## 5 Indianapolis Velocipedes Trail 0.0408
## 6 Indianapolis Velocipedes Cyclocross 0.0376
## 7 Indianapolis Velocipedes Elite Road 0.376
## 8 Indianapolis Velocipedes Endurance Road 0.241
## 9 Indianapolis Velocipedes Triathalon 0.0596
## 10 Austin Cruisers Cross Country Race 0.0854
## # … with 260 more rows
Heatmap Visualization
prop_sales_by_customer_tbl %>%
ggplot(aes(product_category, bikeshop_name)) +
# Geometirex
geom_tile(aes(fill = prop)) +
geom_text(aes(label = scales::percent(prop, accuracy = 1)), size = 3) +
# Formatting
scale_fill_gradient(low = "white", high = palette_light()[1]) +
labs(title = "Heatmap of Customer Purchasing Habits",
subtitle = "Used to investigate Customer Similarity",
x = "Bike Type (Product Category)",
y = "Bikeshop (Customer)") +
theme_tq() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "none",
plot.title = element_text(face = "bold"),
plot.subtitle = element_text(face = "bold.italic"))