Title: | Automated Linear Regression Diagnostic |
---|---|
Description: | Provides a set of streamlined functions that allow easy generation of linear regression diagnostic plots necessarily for checking linear model assumptions. This package is meant for easy scheming of linear regression diagnostics, while preserving merits of "The Grammar of Graphics" as implemented in 'ggplot2'. See the 'ggplot2' website for more information regarding the specific capability of graphics. |
Authors: | Yeuk Yu Lee [aut, cre], Samuel Ventura [aut] |
Maintainer: | Yeuk Yu Lee <[email protected]> |
License: | MIT + file LICENSE |
Version: | 0.10 |
Built: | 2024-11-22 03:35:48 UTC |
Source: | https://github.com/yeukyul/lindia |
Plot boxcox graph in ggplot with suggested lambda transformation
gg_boxcox(fitted.lm, showlambda = TRUE, lambdaSF = 3, scale.factor = 0.5)
gg_boxcox(fitted.lm, showlambda = TRUE, lambdaSF = 3, scale.factor = 0.5)
fitted.lm |
a fitted linear model (i.e. lm, glm) that contains fitted regression |
showlambda |
logical; controls whether lambda value should be displayed on graph. Defaults to TRUE |
lambdaSF |
numeric; controls to how many significant figure is lambda rounded to. Defaults to 3. |
scale.factor |
numeric; scales the point size and linewidth to allow customized viewing. Defaults to 0.5. |
A ggplot object that contains boxcox graph
library(MASS) data(Cars93) cars_lm <- lm(Price ~ Passengers + Length + RPM, data = Cars93) gg_boxcox(cars_lm)
library(MASS) data(Cars93) cars_lm <- lm(Price ~ Passengers + Length + RPM, data = Cars93) gg_boxcox(cars_lm)
Plot cook's distance graph
gg_cooksd( fitted.lm, label = TRUE, show.threshold = TRUE, threshold = "convention", scale.factor = 0.5 )
gg_cooksd( fitted.lm, label = TRUE, show.threshold = TRUE, threshold = "convention", scale.factor = 0.5 )
fitted.lm |
a fitted linear model (i.e. lm, glm) that contains fitted regression |
label |
logical; whether or not to label observation number larger than threshold. Default to TRUE. |
show.threshold |
logical; determine whether or not threshold line is to be shown. Default to TRUE. |
threshold |
string; determining the cut off label of cook's distance. Choices are "baseR" (0.5 and 1), "matlab" (mean(cooksd)*3), and "convention" (4/n and 1). Default to "convention". |
scale.factor |
numeric; scales the point size and linewidth to allow customized viewing. Defaults to 0.5. |
A ggplot object that contains a cook's distance plot
library(MASS) data(Cars93) cars_lm <- lm(Price ~ Passengers + Length + RPM, data = Cars93) gg_cooksd(cars_lm)
library(MASS) data(Cars93) cars_lm <- lm(Price ~ Passengers + Length + RPM, data = Cars93) gg_cooksd(cars_lm)
Plot all diagnostic plots given fitted linear regression line.
gg_diagnose( fitted.lm, theme = NULL, ncol = NA, plot.all = TRUE, mode = "all", scale.factor = 0.5, boxcox = FALSE, max.per.page = NA )
gg_diagnose( fitted.lm, theme = NULL, ncol = NA, plot.all = TRUE, mode = "all", scale.factor = 0.5, boxcox = FALSE, max.per.page = NA )
fitted.lm |
lm object that contains fitted regression |
theme |
ggplot graphing style using 'ggplot::theme()'. A ggplot graphing style to apply to all plots. Default to null. |
ncol |
specify number of columns in resulting plot per page. Default to make a square matrix of the output. |
plot.all |
logical; determine whether plot will be returned as an arranged grid. When set to false, the function will return a list of diagnostic plots. Parameter defaults to TRUE. |
mode |
A string. Specifies which set of diagnostic plots to return: * 'all' (the default) * 'base_r': only graphs included in the base R 'plot(lm(...))' (i.e. residual vs fitted, QQ plot, scale location, residual vs leverage) |
scale.factor |
numeric; scales the point size, linewidth, labels in all diagnostic plots to allow optimal viewing. Defaults to 0.5. |
boxcox |
logical; detemine whether boxcox plot will be included. Parameter defaults to FALSE. |
max.per.page |
numeric; maximum number of plots allowed in one page. |
An arranged grid of linear model diagnostics plots in ggplot. If plot.all is set to FALSE, a list of ggplot objects will be returned instead. Name of the plots are set to respective variable names.
library(MASS) data(Cars93) # a regression with categorical variable cars_lm <- lm(Price ~ Passengers + Length + RPM + Origin, data = Cars93) gg_diagnose(cars_lm) # customize which diagnostic plot is included plots <- gg_diagnose(cars_lm, plot.all = FALSE) names(plots) # get name of the plots exclude_plots <- plots[-c(1, 3) ] #exclude certain diagnostics plots include_plots <- plots[c(1, 3)] # include certain diagnostics plots plot_all(exclude_plots) # make use of plot_all() in lindia plot_all(include_plots)
library(MASS) data(Cars93) # a regression with categorical variable cars_lm <- lm(Price ~ Passengers + Length + RPM + Origin, data = Cars93) gg_diagnose(cars_lm) # customize which diagnostic plot is included plots <- gg_diagnose(cars_lm, plot.all = FALSE) names(plots) # get name of the plots exclude_plots <- plots[-c(1, 3) ] #exclude certain diagnostics plots include_plots <- plots[c(1, 3)] # include certain diagnostics plots plot_all(exclude_plots) # make use of plot_all() in lindia plot_all(include_plots)
Plot quantile-quantile plot (QQPlot) in ggplot with qqline shown.
gg_qqplot(fitted.lm, scale.factor = 1)
gg_qqplot(fitted.lm, scale.factor = 1)
fitted.lm |
a fitted linear model (i.e. lm, glm) that contains fitted regression |
scale.factor |
numeric; scales the point size and linewidth to allow customized viewing. Defaults to 1. |
A qqplot with fitted qqline
library(MASS) data(Cars93) cars_lm <- lm(Price ~ Passengers + Length + RPM, data = Cars93) gg_qqplot(cars_lm)
library(MASS) data(Cars93) cars_lm <- lm(Price ~ Passengers + Length + RPM, data = Cars93) gg_qqplot(cars_lm)
Generate residual plot of residuals against fitted value
gg_resfitted(fitted.lm, scale.factor = 1)
gg_resfitted(fitted.lm, scale.factor = 1)
fitted.lm |
a fitted linear model (i.e. lm, glm) that contains fitted regression |
scale.factor |
numeric; scales the point size and linewidth to allow customized viewing. Defaults to 1. |
A ggplot object
library(MASS) data(Cars93) cars_lm <- lm(Price ~ Passengers + Length + RPM, data = Cars93) gg_resfitted(cars_lm)
library(MASS) data(Cars93) cars_lm <- lm(Price ~ Passengers + Length + RPM, data = Cars93) gg_resfitted(cars_lm)
Generate histogram of residuals in ggplot.
gg_reshist(fitted.lm, bins = NULL)
gg_reshist(fitted.lm, bins = NULL)
fitted.lm |
a fitted linear model (i.e. lm, glm) that contains fitted regression |
bins |
bin size for histogram |
A ggplot object
library(MASS) data(Cars93) cars_lm <- lm(Price~ Passengers + Length + RPM, data = Cars93) gg_reshist(cars_lm) # specify number of bins gg_reshist(cars_lm, bins = 20)
library(MASS) data(Cars93) cars_lm <- lm(Price~ Passengers + Length + RPM, data = Cars93) gg_reshist(cars_lm) # specify number of bins gg_reshist(cars_lm, bins = 20)
Plot residual versus leverage plot in ggplot.
gg_resleverage(fitted.lm, method = "loess", se = FALSE, scale.factor = 1)
gg_resleverage(fitted.lm, method = "loess", se = FALSE, scale.factor = 1)
fitted.lm |
a fitted linear model (i.e. lm, glm) that contains fitted regression |
method |
smoothing method of fitted line on scale-location plot. eg. "lm", "glm", "gam", "loess", "rlm". See https://ggplot2.tidyverse.org/reference/geom_smooth.html for more details. |
se |
logical; determines whether se belt should be plotted on plot |
scale.factor |
numeric; scales the point size and linewidth to allow customized viewing. Defaults to 1. |
A ggplot object that contains residual vs. leverage graph
library(MASS) data(Cars93) cars_lm <- lm(Price ~ Passengers + Length + RPM, data = Cars93) gg_resleverage(cars_lm)
library(MASS) data(Cars93) cars_lm <- lm(Price ~ Passengers + Length + RPM, data = Cars93) gg_resleverage(cars_lm)
Generate residual plot of residuals against predictors
gg_resX( fitted.lm, plot.all = TRUE, scale.factor = 0.5, max.per.page = NA, ncol = NA )
gg_resX( fitted.lm, plot.all = TRUE, scale.factor = 0.5, max.per.page = NA, ncol = NA )
fitted.lm |
a fitted linear model (i.e. lm, glm) that contains fitted regression |
plot.all |
boolean value to determine whether plot will be return as a plot arranged using 'grid.arrange()'. When set to false, the function would return a list of residual plots. Parameter defaults to TRUE. |
scale.factor |
numeric; scales the point size and linewidth to allow customized viewing. Defaults to 0.5. |
max.per.page |
numeric; maximum number of plots allowed in one page. Parameter defaults to fit all plots on one page. |
ncol |
specify number of columns in resulting plot per page. Default to make a square matrix of the output. |
An arranged grid of residuals against predictor values plots in ggplot. If plotall is set to FALSE, a list of ggplot objects will be returned instead. Name of the plots are set to respective variable names.
library(MASS) data(Cars93) # a regression with categorical variable cars_lm <- lm(Price ~ Passengers + Length + RPM + Origin, data = Cars93) gg_resX(cars_lm) # customize which diagnostic plot is included by have gg_resX to return a list of plots plots <- gg_resX(cars_lm, plot.all = FALSE) names(plots) # get name of the plots exclude_plots <- plots[-1 ] #exclude certain residual plots include_plots <- plots[1] # include certain residual plots plot_all(exclude_plots) # make use of plot_all() in lindia plot_all(include_plots)
library(MASS) data(Cars93) # a regression with categorical variable cars_lm <- lm(Price ~ Passengers + Length + RPM + Origin, data = Cars93) gg_resX(cars_lm) # customize which diagnostic plot is included by have gg_resX to return a list of plots plots <- gg_resX(cars_lm, plot.all = FALSE) names(plots) # get name of the plots exclude_plots <- plots[-1 ] #exclude certain residual plots include_plots <- plots[1] # include certain residual plots plot_all(exclude_plots) # make use of plot_all() in lindia plot_all(include_plots)
Plot scale-location (also called spread-location plot) in ggplot.
gg_scalelocation(fitted.lm, method = "loess", scale.factor = 1, se = FALSE)
gg_scalelocation(fitted.lm, method = "loess", scale.factor = 1, se = FALSE)
fitted.lm |
a fitted linear model (i.e. lm, glm) that contains fitted regression |
method |
smoothing method of fitted line on scale-location plot. eg. "lm", "glm", "gam", "loess", "rlm". See https://ggplot2.tidyverse.org/reference/geom_smooth.html for more details. |
scale.factor |
numeric; scales the point size and linewidth to allow customized viewing. Defaults to 1. |
se |
logical; determines whether se belt should be plotted on plot |
A ggplot object that contains scale-location graph
library(MASS) data(Cars93) cars_lm <- lm(Price ~ Passengers + Length + RPM, data = Cars93) gg_scalelocation(cars_lm)
library(MASS) data(Cars93) cars_lm <- lm(Price ~ Passengers + Length + RPM, data = Cars93) gg_scalelocation(cars_lm)
Plot all given plots in a square matrix form.
plot_all(plots, ncol = NA, max.per.page = NA)
plot_all(plots, ncol = NA, max.per.page = NA)
plots |
a list of plots |
ncol |
numeric; the number of column that the arranged grid need to be. defaults to fitting all plots in square matrix |
max.per.page |
numeric; maximum number of plots allowed in one page. |
plots in a given list arrangeed using gridExtra
library(MASS) data(Cars93) # a regression with categorical variable cars_lm <- lm(Price ~ Passengers + Length + RPM + Origin, data = Cars93) plots <- gg_diagnose(cars_lm, plot.all = FALSE) names(plots) selected.plots <- plots[-c(2, 5)] plot_all(selected.plots)
library(MASS) data(Cars93) # a regression with categorical variable cars_lm <- lm(Price ~ Passengers + Length + RPM + Origin, data = Cars93) plots <- gg_diagnose(cars_lm, plot.all = FALSE) names(plots) selected.plots <- plots[-c(2, 5)] plot_all(selected.plots)