|
@@ -0,0 +1,863 @@
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+knitr::opts_chunk$set(fig.path='Figs/',
|
|
|
+ echo=FALSE, warning=FALSE, message=FALSE)
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+library(ggplot2)
|
|
|
+library(gridExtra)
|
|
|
+library(GGally)
|
|
|
+library(ggthemes)
|
|
|
+library(dplyr)
|
|
|
+library(memisc)
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+wine <- read.csv("wineQualityReds.csv")
|
|
|
+str(wine)
|
|
|
+summary(wine)
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+wine$quality <- ordered(wine$quality,
|
|
|
+ levels=c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10))
|
|
|
+
|
|
|
+wine$total.acidity <- wine$fixed.acidity + wine$volatile.acidity
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+g_base <- ggplot(
|
|
|
+ data = wine,
|
|
|
+ aes(color=I('black'), fill=I('#990000'))
|
|
|
+)
|
|
|
+
|
|
|
+g1 <- g_base +
|
|
|
+ geom_histogram(aes(x = fixed.acidity), binwidth = 0.25) +
|
|
|
+ scale_x_continuous(breaks = seq(4, 16, 2)) +
|
|
|
+ coord_cartesian(xlim = c(4, 16))
|
|
|
+
|
|
|
+g2 <- g_base +
|
|
|
+ geom_histogram(aes(x = volatile.acidity), binwidth = 0.05) +
|
|
|
+ scale_x_continuous(breaks = seq(0, 2, 0.5)) +
|
|
|
+ coord_cartesian(xlim = c(0, 2))
|
|
|
+
|
|
|
+g3 <- g_base +
|
|
|
+ geom_histogram(aes(x = total.acidity), binwidth = 0.25) +
|
|
|
+ scale_x_continuous(breaks = seq(0, 18, 1)) +
|
|
|
+ coord_cartesian(xlim = c(4, 18))
|
|
|
+
|
|
|
+g4 <- g_base +
|
|
|
+ geom_histogram(aes(x = citric.acid), binwidth = 0.05) +
|
|
|
+ scale_x_continuous(breaks = seq(0, 1, 0.2)) +
|
|
|
+ coord_cartesian(xlim = c(0, 1))
|
|
|
+
|
|
|
+g5 <- g_base +
|
|
|
+ geom_histogram(aes(x = residual.sugar), binwidth = 0.5) +
|
|
|
+ scale_x_continuous(breaks = seq(0, 16, 2)) +
|
|
|
+ coord_cartesian(xlim = c(0, 16))
|
|
|
+
|
|
|
+g6 <- g_base +
|
|
|
+ geom_histogram(aes(x = chlorides), binwidth = 0.01) +
|
|
|
+ scale_x_continuous(breaks = seq(0, 0.75, 0.25)) +
|
|
|
+ coord_cartesian(xlim = c(0, 0.75))
|
|
|
+
|
|
|
+g7 <- g_base +
|
|
|
+ geom_histogram(aes(x = free.sulfur.dioxide), binwidth = 2.5) +
|
|
|
+ scale_x_continuous(breaks = seq(0, 75, 25)) +
|
|
|
+ coord_cartesian(xlim = c(0, 75))
|
|
|
+
|
|
|
+g8 <- g_base +
|
|
|
+ geom_histogram(aes(x = total.sulfur.dioxide), binwidth = 10) +
|
|
|
+ scale_x_continuous(breaks = seq(0, 300, 100)) +
|
|
|
+ coord_cartesian(xlim = c(0, 295))
|
|
|
+
|
|
|
+g9 <- g_base +
|
|
|
+ geom_histogram(aes(x = density), binwidth = 0.0005) +
|
|
|
+ scale_x_continuous(breaks = seq(0.99, 1.005, 0.005)) +
|
|
|
+ coord_cartesian(xlim = c(0.99, 1.005))
|
|
|
+
|
|
|
+g10 <- g_base +
|
|
|
+ geom_histogram(aes(x = pH), binwidth = 0.05) +
|
|
|
+ scale_x_continuous(breaks = seq(2.5, 4.5, 0.5)) +
|
|
|
+ coord_cartesian(xlim = c(2.5, 4.5))
|
|
|
+
|
|
|
+g11 <- g_base +
|
|
|
+ geom_histogram(aes(x = sulphates), binwidth = 0.05) +
|
|
|
+ scale_x_continuous(breaks = seq(0, 2, 0.5)) +
|
|
|
+ coord_cartesian(xlim = c(0, 2))
|
|
|
+
|
|
|
+g12 <- g_base +
|
|
|
+ geom_histogram(aes(x = alcohol), binwidth = 0.25) +
|
|
|
+ scale_x_continuous(breaks = seq(8, 15, 2)) +
|
|
|
+ coord_cartesian(xlim = c(8, 15))
|
|
|
+
|
|
|
+grid.arrange(g1, g2, g3, g4, g5, g6,
|
|
|
+ g7, g8, g9, g10, g11, g12, ncol=3)
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+base_hist <- ggplot(
|
|
|
+ data = wine,
|
|
|
+ aes(color=I('black'), fill=I('#990000'))
|
|
|
+)
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ac1 <- base_hist +
|
|
|
+ geom_histogram(aes(x = fixed.acidity), binwidth = 0.25) +
|
|
|
+ scale_x_continuous(breaks = seq(4, 16, 2)) +
|
|
|
+ coord_cartesian(xlim = c(4, 16))
|
|
|
+
|
|
|
+ac2 <- base_hist +
|
|
|
+ geom_histogram(aes(x = volatile.acidity), binwidth = 0.05) +
|
|
|
+ scale_x_continuous(breaks = seq(0, 2, 0.5)) +
|
|
|
+ coord_cartesian(xlim = c(0, 2))
|
|
|
+
|
|
|
+grid.arrange(ac1, ac2, nrow=2)
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ac1 <- base_hist +
|
|
|
+ geom_histogram(aes(x = fixed.acidity), binwidth = 0.25) +
|
|
|
+ scale_x_continuous(breaks = seq(4, 16, 2)) +
|
|
|
+ coord_cartesian(xlim = c(4, 16))
|
|
|
+
|
|
|
+ac2 <- base_hist +
|
|
|
+ geom_histogram(aes(x = volatile.acidity), binwidth = 0.05) +
|
|
|
+ scale_x_continuous(breaks = seq(0, 2, 0.5)) +
|
|
|
+ coord_cartesian(xlim = c(0, 2))
|
|
|
+
|
|
|
+ac3 <- base_hist +
|
|
|
+ geom_histogram(aes(x = total.acidity), binwidth = 0.25) +
|
|
|
+ scale_x_continuous(breaks = seq(0, 18, 2)) +
|
|
|
+ coord_cartesian(xlim = c(0, 18))
|
|
|
+
|
|
|
+grid.arrange(ac1, ac2, ac3, nrow=3)
|
|
|
+
|
|
|
+print("Summary statistics of Fixed Acidity")
|
|
|
+summary(wine$fixed.acidity)
|
|
|
+print("Summary statistics of Volatile Acidity")
|
|
|
+summary(wine$volatile.acidity)
|
|
|
+print("Summary statistics of Total Acidity")
|
|
|
+summary(wine$total.acidity)
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+base_hist +
|
|
|
+ geom_histogram(aes(x = citric.acid), binwidth = 0.05) +
|
|
|
+ scale_x_continuous(breaks = seq(0, 1, 0.2)) +
|
|
|
+ coord_cartesian(xlim = c(0, 1))
|
|
|
+
|
|
|
+print("Summary statistics of Citric Acid")
|
|
|
+summary(wine$citric.acid)
|
|
|
+print('Number of Zero Values')
|
|
|
+table(wine$citric.acid == 0)
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+sul1 <- base_hist + geom_histogram(aes(x = free.sulfur.dioxide))
|
|
|
+sul2 <- base_hist + geom_histogram(aes(x = log10(free.sulfur.dioxide)))
|
|
|
+
|
|
|
+sul3 <- base_hist + geom_histogram(aes(x = total.sulfur.dioxide))
|
|
|
+sul4 <- base_hist + geom_histogram(aes(x = log10(total.sulfur.dioxide)))
|
|
|
+
|
|
|
+sul5 <- base_hist + geom_histogram(aes(x = sulphates))
|
|
|
+sul6 <- base_hist + geom_histogram(aes(x = log10(sulphates)))
|
|
|
+
|
|
|
+grid.arrange(sul1, sul2, sul3, sul4, sul5, sul6, nrow=3)
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+base_hist +
|
|
|
+ geom_histogram(aes(x = alcohol), binwidth = 0.25) +
|
|
|
+ scale_x_continuous(breaks = seq(8, 15, 2)) +
|
|
|
+ coord_cartesian(xlim = c(8, 15))
|
|
|
+
|
|
|
+print("Summary statistics for alcohol %age.")
|
|
|
+summary(wine$alcohol)
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+qplot(x=quality, data=wine, geom='bar',
|
|
|
+ fill=I("#990000"),
|
|
|
+ col=I("black"))
|
|
|
+
|
|
|
+print("Summary statistics - Wine Quality.")
|
|
|
+summary(wine$quality)
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+wine$rating <- ifelse(wine$quality < 5, 'C',
|
|
|
+ ifelse(wine$quality < 7, 'B', 'A'))
|
|
|
+
|
|
|
+
|
|
|
+wine$rating <- ordered(wine$rating,
|
|
|
+ levels = c('C', 'B', 'A'))
|
|
|
+
|
|
|
+summary(wine$rating)
|
|
|
+
|
|
|
+qr1 <- ggplot(aes(as.numeric(quality), fill=rating), data=wine) +
|
|
|
+ geom_bar() +
|
|
|
+ ggtitle ("Barchart of Quality with Rating") +
|
|
|
+ scale_x_continuous(breaks=seq(3,8,1)) +
|
|
|
+ xlab("Quality") +
|
|
|
+ theme_pander() + scale_colour_few()
|
|
|
+
|
|
|
+qr2 <- qplot(x=rating, data=wine, geom='bar',
|
|
|
+ fill=I("#990000"),
|
|
|
+ col=I("black")) +
|
|
|
+ xlab("Rating") +
|
|
|
+ ggtitle("Barchart of Rating") +
|
|
|
+ theme_pander()
|
|
|
+
|
|
|
+grid.arrange(qr1, qr2, ncol=2)
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+print("Summary statistics of Wine with Rating 'A'")
|
|
|
+summary(subset(wine, rating=='A'))
|
|
|
+
|
|
|
+print("Summary statistics of Wine with Rating 'C'")
|
|
|
+summary(subset(wine, rating=='C'))
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ggcorr(wine,
|
|
|
+ size = 2.2, hjust = 0.8,
|
|
|
+ low = "#4682B4", mid = "white", high = "#E74C3C")
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+aq1 <- ggplot(aes(x=rating, y=total.acidity), data = wine) +
|
|
|
+ geom_boxplot(fill = '#ffeeee') +
|
|
|
+ coord_cartesian(ylim=c(0, quantile(wine$total.acidity, 0.99))) +
|
|
|
+ geom_point(stat='summary', fun.y=mean,color='red') +
|
|
|
+ xlab('Rating') + ylab('Total Acidity')
|
|
|
+
|
|
|
+aq2 <- ggplot(aes(x=quality, y=total.acidity), data = wine) +
|
|
|
+ geom_boxplot(fill = '#ffeeee') +
|
|
|
+ coord_cartesian(ylim=c(0, quantile(wine$total.acidity, 0.99))) +
|
|
|
+ geom_point(stat='summary', fun.y=mean, color='red') +
|
|
|
+ xlab('Quality') + ylab('Total Acidity') +
|
|
|
+ geom_jitter(alpha=1/10, color='#990000') +
|
|
|
+ ggtitle("\n")
|
|
|
+
|
|
|
+grid.arrange(aq1, aq2, ncol=1)
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ggplot(aes(x = volatile.acidity, fill = quality, color = quality),
|
|
|
+ data = wine) +
|
|
|
+ geom_density(alpha=0.08)
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+qas0 <- ggplot(aes(x=alcohol, y=as.numeric(quality)), data=wine) +
|
|
|
+ geom_jitter(alpha=1/12) +
|
|
|
+ geom_smooth() +
|
|
|
+ ggtitle("Alcohol Content vs. Quality") +
|
|
|
+ ylab("Quality") + xlab("Alcohol")
|
|
|
+
|
|
|
+qas1 <- ggplot(aes(x=alcohol), data=wine) +
|
|
|
+ geom_density(fill=I("#BB0000")) +
|
|
|
+ facet_wrap("quality") +
|
|
|
+ ggtitle("Alcohol Content for \nWine Quality Ratings") +
|
|
|
+ ylab("Density") + xlab("Alcohol")
|
|
|
+
|
|
|
+qas2 <- ggplot(aes(x=residual.sugar, y=alcohol), data=wine) +
|
|
|
+ geom_jitter(alpha=1/12) +
|
|
|
+ geom_smooth() +
|
|
|
+ ggtitle("Alcohol vs. Residual Sugar Content") +
|
|
|
+ ylab("Alcohol") + xlab("Residual Sugar")
|
|
|
+
|
|
|
+grid.arrange(qas1, arrangeGrob(qas0, qas2), ncol=2)
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+quality_groups <- group_by(wine, alcohol)
|
|
|
+
|
|
|
+wine.quality_groups <- summarize(quality_groups,
|
|
|
+ acidity_mean = mean(volatile.acidity),
|
|
|
+ pH_mean = mean(pH),
|
|
|
+ sulphates_mean = mean(sulphates),
|
|
|
+ qmean = mean(as.numeric(quality)),
|
|
|
+ n = n())
|
|
|
+
|
|
|
+wine.quality_groups <- arrange(wine.quality_groups, alcohol)
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ggplot(aes(y=alcohol, x=factor(quality)), data = wine) +
|
|
|
+ geom_boxplot(fill = '#ffeeee')+
|
|
|
+ xlab('quality')
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ggplot(aes(alcohol, qmean), data=wine.quality_groups) +
|
|
|
+ geom_smooth() +
|
|
|
+ ylab("Quality Mean") +
|
|
|
+ scale_x_continuous(breaks = seq(0, 15, 0.5)) +
|
|
|
+ xlab("Alcohol %")
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ggplot(aes(y=sulphates, x=quality), data=wine) +
|
|
|
+ geom_boxplot(fill="#ffeeee")
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+sq1 <- ggplot(aes(x=sulphates, y=as.numeric(quality)), data=wine) +
|
|
|
+ geom_jitter(alpha=1/10) +
|
|
|
+ geom_smooth() +
|
|
|
+ xlab("Sulphates") + ylab("Quality") +
|
|
|
+ ggtitle("Sulphates vs. Quality")
|
|
|
+
|
|
|
+sq2 <- ggplot(aes(x=sulphates, y=as.numeric(quality)),
|
|
|
+ data=subset(wine, wine$sulphates < 1)) +
|
|
|
+ geom_jitter(alpha=1/10) +
|
|
|
+ geom_smooth() +
|
|
|
+ xlab("Sulphates") + ylab("Quality") +
|
|
|
+ ggtitle("\nSulphates vs Quality without Outliers")
|
|
|
+
|
|
|
+grid.arrange(sq1, sq2, nrow = 2)
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ggplot(wine, aes(x=alcohol, y=volatile.acidity, color=quality)) +
|
|
|
+ geom_jitter(alpha=0.8, position = position_jitter()) +
|
|
|
+ geom_smooth(method="lm", se = FALSE, size=1) +
|
|
|
+ scale_color_brewer(type='seq',
|
|
|
+ guide=guide_legend(title='Quality')) +
|
|
|
+ theme_pander()
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ggplot(subset(wine, rating=='A'|rating=='C'),
|
|
|
+ aes(x=volatile.acidity, y=citric.acid)) +
|
|
|
+ geom_point() +
|
|
|
+ geom_jitter(position=position_jitter(), aes(color=rating)) +
|
|
|
+ geom_vline(xintercept=c(0.6), linetype='dashed', size=1, color='black') +
|
|
|
+ geom_hline(yintercept=c(0.5), linetype='dashed', size=1, color='black') +
|
|
|
+ scale_x_continuous(breaks = seq(0, 1.6, .1)) +
|
|
|
+ theme_pander() + scale_colour_few()
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ggplot(subset(wine, rating=='A'|rating=='C'), aes(x = alcohol, y = sulphates)) +
|
|
|
+ geom_jitter(position = position_jitter(), aes(color=rating)) +
|
|
|
+ geom_hline(yintercept=c(0.65), linetype='dashed', size=1, color='black') +
|
|
|
+ theme_pander() + scale_colour_few() +
|
|
|
+ scale_y_continuous(breaks = seq(0, 2, .2))
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+da1 <- ggplot(aes(x=density, y=total.acidity, color=as.numeric(quality)),
|
|
|
+ data=wine) +
|
|
|
+ geom_point(position='jitter') +
|
|
|
+ geom_smooth() +
|
|
|
+ labs(x="Total Acidity", y="Density", color="Quality") +
|
|
|
+ ggtitle("Density vs. Acidity Colored by Wine Quality Ratings")
|
|
|
+
|
|
|
+cs2 <- ggplot(aes(x=residual.sugar, y=density, color=as.numeric(quality)),
|
|
|
+ data=wine) +
|
|
|
+ geom_point(position='jitter') +
|
|
|
+ geom_smooth() +
|
|
|
+ labs(x="Residual Sugar", y="Density", color="Quality") +
|
|
|
+ ggtitle("\nSugar vs. Chlorides colored by Wine Quality Ratings")
|
|
|
+
|
|
|
+grid.arrange(da1, cs2)
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+qr1 <- ggplot(aes(as.numeric(quality), fill=rating), data=wine) +
|
|
|
+ geom_bar() +
|
|
|
+ ggtitle ("Barchart of Quality with Rating") +
|
|
|
+ scale_x_continuous(breaks=seq(3,8,1)) +
|
|
|
+ xlab("Quality") +
|
|
|
+ theme_pander() + scale_colour_few()
|
|
|
+
|
|
|
+qr2 <- qplot(x=rating, data=wine, geom='bar',
|
|
|
+ fill=I("#990000"),
|
|
|
+ col=I("black")) +
|
|
|
+ xlab("Rating") +
|
|
|
+ ggtitle("Barchart of Rating") +
|
|
|
+ theme_pander()
|
|
|
+
|
|
|
+grid.arrange(qr1, qr2, ncol=2)
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ggplot(aes(x=alcohol), data=wine) +
|
|
|
+ geom_density(fill=I("#BB0000")) +
|
|
|
+ facet_wrap("quality") +
|
|
|
+ ggtitle("Alcohol Content for Wine Quality Ratings") +
|
|
|
+ labs(x="Alcohol [%age]", y="") +
|
|
|
+ theme(plot.title = element_text(face="plain"),
|
|
|
+ axis.title.x = element_text(size=10),
|
|
|
+ axis.title.y = element_text(size=10))
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+fp1 <- ggplot(aes(y=alcohol, x=quality), data = wine)+
|
|
|
+ geom_boxplot() +
|
|
|
+ xlab('Quality') +
|
|
|
+ ylab("Alcohol in % by Volume") +
|
|
|
+ labs(x="Quality", y="Alcohol [%age]") +
|
|
|
+ ggtitle("Boxplot of Alcohol and Quality") +
|
|
|
+ theme(plot.title = element_text(face="plain"),
|
|
|
+ axis.title.x = element_text(size=10),
|
|
|
+ axis.title.y = element_text(size=10))
|
|
|
+
|
|
|
+fp2 <-ggplot(aes(alcohol, qmean), data=wine.quality_groups) +
|
|
|
+ geom_smooth() +
|
|
|
+ scale_x_continuous(breaks = seq(0, 15, 0.5)) +
|
|
|
+ ggtitle("\nLine Plot of Quality Mean & Alcohol Percentage") +
|
|
|
+ labs(x="Alcohol [%age]", y="Quality (Mean)") +
|
|
|
+ theme(plot.title = element_text(face="plain"),
|
|
|
+ axis.title.x = element_text(size=10),
|
|
|
+ axis.title.y = element_text(size=10))
|
|
|
+
|
|
|
+grid.arrange(fp1, fp2)
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+fp3 <- ggplot(subset(wine, rating=='A'|rating=='C'),
|
|
|
+ aes(x = volatile.acidity, y = citric.acid)) +
|
|
|
+ geom_point() +
|
|
|
+ geom_jitter(position=position_jitter(), aes(color=rating)) +
|
|
|
+ geom_vline(xintercept=c(0.6), linetype='dashed', size=1, color='black') +
|
|
|
+ geom_hline(yintercept=c(0.5), linetype='dashed', size=1, color='black') +
|
|
|
+ scale_x_continuous(breaks = seq(0, 1.6, .1)) +
|
|
|
+ theme_pander() + scale_colour_few() +
|
|
|
+ ggtitle("Wine Rating vs. Acids") +
|
|
|
+ labs(x="Volatile Acidity (g/dm^3)", y="Citric Acid (g/dm^3)") +
|
|
|
+ theme(plot.title = element_text(face="plain"),
|
|
|
+ axis.title.x = element_text(size=10),
|
|
|
+ axis.title.y = element_text(size=10),
|
|
|
+ legend.title = element_text(size=10))
|
|
|
+
|
|
|
+fp4 <- ggplot(subset(wine, rating=='A'|rating=='C'),
|
|
|
+ aes(x = alcohol, y = sulphates)) +
|
|
|
+ geom_jitter(position = position_jitter(), aes(color=rating)) +
|
|
|
+ geom_hline(yintercept=c(0.65), linetype='dashed', size=1, color='black') +
|
|
|
+ theme_pander() + scale_colour_few() +
|
|
|
+ scale_y_continuous(breaks = seq(0,2,.2)) +
|
|
|
+ ggtitle("\nSulphates, Alcohol & Wine-Rating") +
|
|
|
+ labs(x="Alcohol [%]", y="Sulphates (g/dm^3)") +
|
|
|
+ theme(plot.title = element_text(face="plain"),
|
|
|
+ axis.title.x = element_text(size=10),
|
|
|
+ axis.title.y = element_text(size=10),
|
|
|
+ legend.title = element_text(size=10))
|
|
|
+
|
|
|
+grid.arrange(fp3, fp4, nrow=2)
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|