Vedlegg 1 - R kode
Kapittel 2
Sette opp nødvendige pakker:
::p_load(ggplot2, readxl, tidyverse, ggpubr, dplyr, hrbrthemes) pacman
Lage fig 2.1:
<- as_tibble(read_excel("prepost_eksempel_long.xlsx"))
prepost_eksempel_long
ggbarplot(prepost_eksempel_long, x = "Periode", y = "Verdi", add = c("mean"), color = "blue", fill = "lightblue")
Lage fig 2.2:
ggline(prepost_eksempel_long, x = "Periode", y = "Verdi", add = c("mean_se", "jitter"))
Lage fig 2.3:
<- 1:24
t
<- c(prepost_eksempel_long$Verdi)
z
plot(t,z, type="l", col="blue", lwd=3, xlab="Periode", ylab="Antall", xaxt="n")
axis(1, seq(0,24,2))
abline(v=12, col="red", lwd = 3)
text(15.5, 40, "Endring i prosedyre", col = "red")
Kapittel 3
Lage fig 3.1:
# Genererer tilfeldig tall for regel 1:
::p_load(xlsx, ggplot2, tidyverse, ggpubr)
pacman
set.seed(91)
<- as_tibble(rnorm(100, mean = 0, sd = 1)) %>%
regel1_x rename(x = value) %>%
add_column(nr = 1:100) %>%
relocate(nr)
<- as_tibble(rnorm(100, mean = 0, sd = 1)) %>%
regel1_y rename(y = value) %>%
add_column(nr = 1:100) %>%
relocate(nr)
<- merge(regel1_x,regel1_y,by="nr")
regel1
<- ggplot(regel1, aes(x = x, y = y)) +
regel1_plot geom_point(size = 1) + labs(x = "x", y = "y", title = "Regel 1") + xlim(-5,5) + ylim(-5,5)+ geom_vline(xintercept = 0, col = "red") + geom_hline(yintercept = 0, col = "red")
# Genererer tilfeldig tall for regel 2:
set.seed(92)
<- as_tibble(rnorm(100, mean = 0, sd = 2)) %>%
regel2_x rename(x = value) %>%
add_column(nr = 1:100) %>%
relocate(nr)
## Setter sd = 2 fordi variasjonen med regel 2 er dobbel av regel 1 (jfr SPC for Excel)
<- as_tibble(rnorm(100, mean = 0, sd = 2)) %>%
regel2_y rename(y = value) %>%
add_column(nr = 1:100) %>%
relocate(nr)
<- merge(regel2_x,regel2_y,by="nr")
regel2
<- ggplot(regel2, aes(x = x, y = y)) +
regel2_plot geom_point(size = 1) + labs(x = "x", y = "y", title = "Regel 2") + xlim(-5,5) + ylim(-5,5) + geom_vline(xintercept = 0, col = "red") + geom_hline(yintercept = 0, col = "red")
# Henter tall fra Excel for regel 3:
<- read.xlsx("Funnel_blankedata_regel3.xlsx", 1)
regel3
<- ggplot(regel3, aes(x = Regel_3_x, y = Regel_3_y)) +
regel3_plot geom_point(size = 1) + labs(x = "x", y = "y", title = "Regel 3") + xlim(-5,5) + ylim(-5,5)+ geom_vline(xintercept = 0, col = "red") + geom_hline(yintercept = 0, col = "red")
# Henter tall fra Excel for regel 4:
<- read.xlsx("Funnel_blankedata_regel4.xlsx", 1)
regel4
<- ggplot(regel4, aes(x = Regel_4_x, y = Regel_4_y)) +
regel4_plot geom_point(size = 1) + labs(x = "x", y = "y", title = "Regel 4") + xlim(-5,5) + ylim(-5,5)+ geom_vline(xintercept = 0, col = "red") + geom_hline(yintercept = 0, col = "red")
# Setter plottene sammen:
ggarrange(regel1_plot, regel2_plot, regel3_plot, regel4_plot
+ rremove("x.text"), ncol = 2, nrow = 2)
Lage fig 3.2:
<- read.xlsx("Funnel_blankedata_regel3_2.xlsx", 1)
regel3_2
<- ggplot(regel3_2, aes(x = x, y = y)) +
regel3_2_plot geom_point(size = 1) + labs(x = "x", y = "y", title = "Regel 3 - annet førstetreff") + xlim(-5,5) + ylim(-5,5)+ geom_vline(xintercept = 0, col = "red") + geom_hline(yintercept = 0, col = "red")
regel3_2_plot
Lage fig 3.3:
<- read.xlsx("Funnel_blankedata_regel4_2.xlsx", 1)
regel4_2
<- ggplot(regel4_2, aes(x = x, y = y)) +
regel4_2_plot geom_point(size = 1) + labs(x = "x", y = "y", title = "Regel 4 - annet førstetreff") + xlim(-2,10) + ylim(-10,2)+ geom_vline(xintercept = 0, col = "red") + geom_hline(yintercept = 0, col = "red")
regel4_2_plot
Lage fig 3.4:
::p_load(qicharts2)
pacman
<- regel1 %>% pull(2)
regel1x
<- regel1 %>% pull(3)
regel1y
<- qic(regel1x, title = 'x-verdi ved regel 1', ylab = "verdi", xlab = "Forsøk nr.")
regel1xrun
<- qic(regel1y, title = 'y-verdi ved regel 1', ylab = "verdi", xlab = "Forsøk nr.")
regel1yrun
ggarrange(regel1xrun, regel1yrun + rremove("x.text"), ncol = 2, nrow = 1, widths = c(1, 1))
Lage fig 3.5:
::p_load(qicharts2)
pacman
<- regel2 %>% pull(2)
regel2x
<- regel2 %>% pull(3)
regel2y
<- qic(regel2x, title = 'x-verdi ved regel 2', ylab = "verdi", xlab = "Forsøk nr.")
regel2xrun
<- qic(regel2y, title = 'y-verdi ved regel 2', ylab = "verdi", xlab = "Forsøk nr.")
regel2yrun
ggarrange(regel2xrun, regel2yrun + rremove("x.text"), ncol = 2, nrow = 1, widths = c(1, 1))
Lage fig 3.6:
::p_load(qicharts2)
pacman
<- regel3 %>% pull(2)
regel3x
<- regel3 %>% pull(3)
regel3y
<- qic(regel3x, title = 'x-verdi ved regel 3', ylab = "verdi", xlab = "Forsøk nr.")
regel3xrun
<- qic(regel3y, title = 'y-verdi ved regel 3', ylab = "verdi", xlab = "Forsøk nr.")
regel3yrun
ggarrange(regel3xrun, regel3yrun + rremove("x.text"), ncol = 2, nrow = 1, widths = c(1, 1))
Lage fig 3.7:
::p_load(qicharts2)
pacman
<- regel4 %>% pull(2)
regel4x
<- regel4 %>% pull(3)
regel4y
<- qic(regel4x, title = 'x-verdi ved regel 4', ylab = "verdi", xlab = "Forsøk nr.")
regel4xrun
<- qic(regel4y, title = 'y-verdi ved regel 4', ylab = "verdi", xlab = "Forsøk nr.")
regel4yrun
ggarrange(regel4xrun, regel4yrun + rremove("x.text"), ncol = 2, nrow = 1, widths = c(2, 2))
Lage fig 3.8:
::p_load(xlsx, qicharts2, tidyverse, ggplot2, ggpubr)
pacman
<- read.xlsx("toteam.xlsx", 1)
toteam1
<- qic(Team.1,
team1 data = toteam1,
chart = 'i',
show.grid = TRUE,
title = "Team 1",
ylab = "Antall defekter pr uke",
xlab = "Uke #")
<- qic(Team.2,
team2 data = toteam1,
chart = 'i',
show.grid = TRUE,
title = "Team 2",
ylab = "Antall defekter pr uke",
xlab = "Uke #")
ggarrange(team1, team2 + rremove("x.text"), ncol = 2, nrow = 1, widths = c(1, 1))
Lage fig 3.9:
::p_load(xlsx, qicharts2, tidyverse, ggplot2, ggpubr)
pacman
<- readxl::read_excel("toteam.xlsx") %>%
toteam2 pivot_longer(c("Team 1", "Team 2"))
qic(Nr, value,
data = toteam2,
facets = ~name,
xlab = "Uke #",
ylab = "Antall defekter pr uke",
title = "To team - like prosesser - ulik variasjon")
Kapittel 4
Lage fig 4.1:
::p_load(tidyverse)
pacman
set.seed(30)
= rnorm(100, 179, 16)
x hist(x, xlab = "Høyde", ylab = "Antall", main = "Histogram for genererte høydedata")
Lage fig 4.2:
<- sample(165:175, 50, replace=TRUE)
data1 <- sample(170:180, 30, replace=TRUE)
data2 <- sample(180:185, 15, replace = TRUE)
data3 <- sample(185:190, 5, replace = TRUE)
data4
<- c(data1, data2, data3, data4)
data
hist(data, xlab = "Høyde", ylab = "Antall", main = "Histogram for genererte høydedata")
Lage fig 4.3:
::p_load(ggplot2, readxl, tidyverse, ggfortify)
pacman
set.seed(100)
<- rnorm(100, mean = 0, sd = 1)
normalfordeling
hist(normalfordeling,
main = "Genererte, normalfordelte data",
xlab = "x",
ylab = "f(x)",
border = "black",
col = "gray",
xlim = c(-4,4),
ylim = c(0,0.5),
las = 1,
probability = TRUE)
Lage fig 4.4:
set.seed(100)
<- rnorm(100, mean = 0, sd = 1)
normalfordeling
hist(normalfordeling,
main = "Genererte, normalfordelte data",
xlab = "x",
ylab = "f(x)",
border = "black",
col = "gray",
xlim = c(-4,4),
ylim = c(0,0.5),
las = 1,
probability = TRUE)
<- mean(normalfordeling)
m <- sqrt(var(normalfordeling))
std
curve(dnorm(x, mean = m, sd = std),
col="darkblue", lwd = 3, add = TRUE, yaxt = "n")
Lage fig 4.5:
::p_load(ggplot2, readxl, tidyverse, ggfortify)
pacman
ggplot(data.frame(x = c(-4, 4)), aes(x)) +
geom_function(fun = dnorm, colour = "darkblue", size = 1.5) +
theme_classic() +
scale_y_continuous(limits = c(0, 0.5), breaks = seq(0, 0.5, by = 0.1))
Lage fig 4.6:
<- seq(-4, 4, length=200)
x <- dnorm(x)
y
plot(x, y, type="l", lty=1, lwd = 2, col = "red", xlab="x",
ylab="f(x)")
<- seq(-1,1,length=100)
x <- dnorm(x)
y
polygon(c(-1,x,1),c(0,y,0),col="lightblue")
abline(v=-1, col="green", lwd = 2)
text(1.3, 0.38, "1 SD", col = "black")
text(-1.35, 0.38, "-1 SD", col = "black")
abline(v=1, col="green", lwd = 2)
text(0, 0.2, "68 %", col = "black")
Lage fig 4.7:
<- seq(-4,4,length=200)
x <- dnorm(x)
y
plot(x, y, type="l", lty=1, lwd = 2, col = "red", xlab="x",
ylab="f(x)")
<- seq(-2,2,length=200)
x <- dnorm(x)
y
polygon(c(-2,x,2),c(0,y,0),col="lightblue")
abline(v=-2, col="green", lwd = 2)
text(2.3, 0.38, "2 SD", col = "black")
text(-2.35, 0.38, "-2 SD", col = "black")
abline(v=2, col="green", lwd = 2)
text(0, 0.2, "95 %", col = "black")
Utregning av areal mellom to x-verdier i normalfordeling:
pnorm(2,mean=0,sd=1)-pnorm(-2,mean=0,sd=1)
Lage fig 4.8:
<- seq(-4,4,length=200)
x <- dnorm(x)
y
plot(x,y,type="l",lwd=2,col="red", xlab="x",
ylab="f(x)")
<- seq(-3,3,length=200)
x <- dnorm(x)
y
polygon(c(-3,x,3),c(0,y,0),col="lightblue")
abline(v=-3, col="green", lwd = 2)
text(3.3, 0.38, "3 SD", col = "black")
text(-3.35, 0.38, "-3 SD", col = "black")
abline(v=3, col="green", lwd = 2)
text(0, 0.2, "99.7 %", col = "black")
Lage fig 4.9:
<- rnorm(n= 100000, mean = 0, sd = 1)
y.norm <- hist(y.norm, breaks = 100, plot = F)
h <- cut(h$breaks, c(-Inf,-3,-2,-1,1,2,3,Inf), right = F) # right=False; sets intervals to be open on the right closed on the left
cuts plot(h,
col = rep(c("white", "4","3","2","3","4", "white"))[cuts],
main = 'Normalfordeling',
xlab = '',
freq = F,
ylim = c(0,0.6))
= 3
lwd # horzintal lines
lines(x = c(2,-2), y = c(0.48,0.48), type = "l", col=3, lwd = lwd)
lines(x = c(3,-3), y = c(0.55,0.55), type = "l", col=4, lwd = lwd)
lines(x = c(1,-1), y = c(0.41,0.41), type = "l", col=2, lwd = lwd)
# vertical lines
lines(x = c(1,1), y = c(0,0.41), type = "l", col=2, lwd = lwd)
lines(x = c(-1,-1), y = c(0,0.41), type = "l", col=2, lwd = lwd)
lines(x = c(2,2), y = c(0,0.48), type = "l", col=3, lwd = lwd)
lines(x = c(-2,-2), y = c(0,0.48), type = "l", col=3, lwd = lwd)
lines(x = c(3,3), y = c(0,0.55), type = "l", col=4, lwd = lwd)
lines(x = c(-3,-3), y = c(0,0.55), type = "l", col=4, lwd = lwd)
# text
text(0, 0.44, "68%", cex = 1.5, col=2)
text(0, 0.51, "95%", cex = 1.5, col=3)
text(0, 0.58, "99.7%", cex = 1.5, col=4)
Lage fig 4.10:
<- 0:20
success
plot(success,dbinom(success,size=20,prob=.5),
type='h',
main="Binomial distribusjon (n=20, p=0.5)",
ylab="Sannsynlighet",
xlab = "Suksess",
lwd=10)
Lage fig 4.11:
<- 0:20
success
plot(success,dbinom(success,size=20,prob=.2),
type='h',
main="Binomial distribusjon (n=20, p=0.2)",
ylab="Sannsynlighet",
xlab = "Suksess",
lwd=10)
Lage fig 4.12:
set.seed(32)
<- sample(1:6, 10, replace = TRUE)
terning10
stripchart(terning10, method = "stack", offset = .5, at = 0, pch = 19,
col = "steelblue", main = "10 terningkast", xlab = "Verdi på terning", ylab = "Antall"))
Lage fig 4.13:
set.seed(33)
<- sample(1:6, 100, replace = TRUE)
terning10
stripchart(terning10, method = "stack", offset = .5, at = 0, pch = 19,
col = "steelblue", main = "100 terningkast", xlab = "Verdi på terning", ylab = "Antall")
Lage simulering av terningkast:
set.seed(43)
<- sample(1:6, 600, replace = TRUE)
terning_runde1 table(terning_runde1)
set.seed(44)
<- sample(1:6, 600, replace = TRUE)
terning_runde2 table(terning_runde2)
set.seed(45)
<- sample(1:6, 600, replace = TRUE)
terning_runde3 table(terning_runde3)
Lage fig 4.14:
set.seed(46)
<- sample(1:6, 6000000, replace = TRUE)
minterning
options(scipen=999)
hist(minterning,
main="Histogram for 6 000 000 terningkast",
ylab="Antall",
xlab = "Verdi på terning")
Lage fig 4.15:
# Grid of X-axis values
<- 0:50
x
#-----------
# lambda: 5
#-----------
<- 5
lambda plot(dpois(x, lambda), type = "h", lwd = 2,
main = "Poisson sannsynlighetsfordeling",
ylab = "P(X = x)", xlab = "Antall hendelser")
#-----------
# lambda: 10
#-----------
<- 10
lambda lines(dpois(x, lambda), type = "h", lwd = 2, col = rgb(1,0,0, 0.7))
#-----------
# lambda: 20
#-----------
<- 20
lambda lines(dpois(x, lambda), type = "h", lwd = 2, col = rgb(0, 1, 0, 0.7))
# Legend
legend("topright", legend = c("5", "10", "20"),
title = expression(lambda), title.adj = 0.75,
lty = 1, col = 1:3, lwd = 2, box.lty = 0)
Lage fig 4.16:
<- 0:10
maal
plot(maal, dpois(maal, lambda=2.5),
type='h',
main='Poissonfordeling (lambda = 2.5)',
ylab='Sannsynlighet',
xlab ='# Mål',
lwd=3)
Regne ut sannsynligheter:
# Sannsynligheten for 0 mål
dpois(x = 0, lambda = 2.5)
# Sannsynligheten for 1 mål
dpois(x = 1, lambda = 2.5)
# Sannsynligheten for 2 mål
dpois(x = 2, lambda = 2.5)
# Sannsynligheten for 3 mål
dpois(x = 3, lambda = 2.5)
# Sannsynligheten for 4 mål
dpois(x = 4, lambda = 2.5)
# sannsynligheten for mellom 1 og 3 mål:
dpois(x = 1, lambda = 2.5) + dpois(x=2, lambda = 2.5) + dpois(x=3, lambda = 2.5)
Lage fig 4.17:
<- seq(1, 20, by = 1)
x_dgeom
<- dgeom(x_dgeom, prob = 0.4)
y_dgeom
plot(y_dgeom,
type="l",
main="Geometrisk fordeling for p = 0.4",
ylab="f(x)",
xlab = "x")
Lage fig 4.18:
::p_load(ggpubr)
pacman
<- seq(0, 20, length.out=1000)
eksford
<- data.frame(x=eksford, fx=dexp(eksford, rate=0.2)) %>%
dat1 add_column(ID = 1:1000) %>%
relocate(3)
<- data.frame(x=eksford, fx=dexp(eksford, rate=1)) %>%
dat2 add_column(ID = 1:1000) %>%
relocate(3)
<- data.frame(x=eksford, fx=dexp(eksford, rate=1.5)) %>%
dat3 add_column(ID = 1:1000) %>%
relocate(3)
<- data.frame(x=eksford, fx=dexp(eksford, rate=2)) %>%
dat4 add_column(ID = 1:1000) %>%
relocate(3)
<- ggplot(dat1, aes(x=x, y=fx)) + geom_line() + ggtitle(expression( ~ lambda ~ " = 0.2"))
dat1plot <- ggplot(dat2, aes(x=x, y=fx)) + geom_line() + ggtitle(expression( ~ lambda ~ " = 1.0"))
dat2plot <- ggplot(dat3, aes(x=x, y=fx)) + geom_line() + ggtitle(expression( ~ lambda ~ " = 1.5"))
dat3plot <- ggplot(dat4, aes(x=x, y=fx)) + geom_line() + ggtitle(expression( ~ lambda ~ " = 2.0"))
dat4plot
ggarrange(dat1plot, dat2plot, dat3plot, dat4plot + rremove("x.text"), ncol = 2, nrow = 2, widths = c(1, 1))
Kapittel 5
Lage fig 5.1:
::p_load(tidyverse, ggplot2, writexl, ggpubr)
pacman
# Lage normalfordelt datasett
set.seed(89)
<- rnorm(10000, mean=90, sd=5)
qqnorm
<- as_tibble(qqnorm)
qqnorm
# Plotte Q-Q plott
ggqqplot(qqnorm$value) + ggtitle("Normal Q-Q plott") + labs(x = "Teoretisk forventning", y = "Data")
Lage fig 5.2:
::p_load(gridExtra, writexl, ggplot2, tidyverse, ggpubr)
pacman
# Lage datasett med right skew
<- 5000
N <- rnbinom(N, 10, .1)
qqrightskew
<- as_tibble(qqrightskew)
qqrightskew
# Plotte histogram og Q-Q plott
<- ggplot(qqrightskew, aes(x=value)) + geom_histogram(color="black", fill="lightblue")
qqrighthist
<- ggqqplot(qqrightskew$value) + ggtitle("Normal Q-Q plott - skjevhet høyre") + labs(x = "Teoretisk forventning", y = "Data")
qqrightskew_plott
grid.arrange(qqrighthist, qqrightskew_plott, ncol=2)
Lage fig 5.3:
::p_load(gridExtra, writexl, ggplot2, tidyverse, ggpubr)
pacman
# Lage datasett med left skew
set.seed(91)
=5000
N<- rbeta(N,2,0.5,ncp=2)
qqleftskew
<- as_tibble(qqleftskew)
qqleftskew
# Plotte histogram og Q-Q plott"
<- ggplot(qqleftskew, aes(x=value)) + geom_histogram(color="black", fill="lightblue")
qqlefthist
<- ggplot(qqleftskew, aes(x=value)) + geom_histogram(color="black", fill="lightblue")
qqlefthist
<- ggqqplot(qqleftskew$value) + ggtitle("Normal Q-Q plott - skjevhet venstre") + labs(x = "Teoretisk forventning", y = "Data")
qqleftskew_plott
grid.arrange(qqlefthist, qqleftskew_plott, ncol=2)
Lage fig 5.4:
::p_load(gridExtra, writexl, ggplot2, tidyverse, ggpubr)
pacman
# Lage datasett med fet hale
set.seed(14)
=100
N
<- as_tibble(rcauchy(N, scale = 5))
qqcauchy
# Plotte histogram og Q-Q plott
<- ggplot(qqcauchy, aes(x=value)) + geom_histogram(color="black", fill="lightblue")
qqcauchyhist
<- ggplot(qqcauchy, aes(x=value)) + geom_histogram(color="black", fill="lightblue")
qqcauchyhist
<- ggqqplot(qqcauchy$value) + ggtitle("Normal Q-Q plott - tung hale") + labs(x = "Teoretisk forventning", y = "Data")
qqcauchy_plott
grid.arrange(qqcauchyhist, qqcauchy_plott, ncol=2)
Lage fig 5.5:
::p_load(gridExtra, writexl, ggplot2, tidyverse, ggpubr)
pacman
# Lage datasett med tynn hale
set.seed(81)
<- runif(n = 1000, min = -1, max = 1)
qqlt
<- as_tibble(qqlt)
qqlt
# Plotte histogram og Q-Q plott
<- ggplot(qqlt, aes(x=value)) + geom_histogram(color="black", fill="lightblue")
qqlthist
<- ggqqplot(qqlt$value) + ggtitle("Normal Q-Q plott - lett hale") + labs(x = "Teoretisk forventning", y = "Data")
qqlt_plott
grid.arrange(qqlthist, qqlt_plott, ncol=2)
Lage fig 5.6:
::p_load(gridExtra, writexl, ggplot2, tidyverse, ggpubr)
pacman
# Lage bimodialt datasett
set.seed(10)
<- rnorm(50,2,1)
mode1 <- mode1[mode1 > 0]
mode1 <- rnorm(50,6,1)
mode2 <- mode2[mode2 > 0]
mode2 <- as_tibble(sort(c(mode1,mode2)))
qqbimod
# Plotte histogram og Q-Q plott
<- ggplot(qqbimod, aes(x=value)) + geom_histogram(color="black", fill="lightblue")
qqbimodhist
<- ggplot(qqbimod, aes(sample = value)) +
qqbimod_plott stat_qq() +
stat_qq_line() +
ggtitle(" Normal Q-Q plott - bimodial") + labs(x = "Teoretisk forventning", y = "Data")
grid.arrange(qqbimodhist, qqbimod_plott, ncol=2)
Kjøre Anderson-Darling test for normalitet:
::p_load(nortest, readxl, tidyverse)
pacman
<- as_tibble(read_excel("Anderson-Darling_raw.xlsx"))
addata
ad.test(addata$Values)
Lage fig 5.7:
::p_load(gridExtra, writexl, ggplot2, tidyverse, readxl)
pacman
<- as_tibble(read_excel("Anderson-Darling_raw.xlsx"))
addata2
ggplot(addata2, aes(sample = Values)) +
stat_qq() +
stat_qq_line() +
ggtitle(" Normal Q-Q plott - A-D data") + labs(x = "Teoretisk forventning", y = "Data")
Kjøre Anderson-Darling test for normalitet på normalfordelte data:
::p_load(nortest, readxl, tidyverse)
pacman
<- as_tibble(read_excel("QQ_norm.xlsx"))
addata3
ad.test(addata3$value)
Kjøre statistiske tester:
::p_load(nortest, readxl, tidyverse, tseries)
pacman
options(scipen=999)
<- as_tibble(read_excel("Anderson-Darling_raw.xlsx"))
addata5
ks.test(addata5, "pnorm")
shapiro.test(addata5$Values)
cvm.test(addata$Values)
Jarque-Bera test:
<- as_tibble(read_excel("Anderson-Darling_raw.xlsx"))
addata6
::p_load(tseries, normtest)
pacman
jarque.bera.test(addata6$Values)
ajb.norm.test(addata6$Values, nrepl=2000)
Lage fig 5.8:
::p_load(ggpubr, tidyverse)
pacman
<- as_tibble(read_excel("Anderson-Darling_raw.xlsx"))
addata7
ggqqplot(addata7$Values)
Kapittel 6
Lage fig 6.1:
::p_load(qicharts2, ggplot2)
pacman
set.seed(21)
<- rnorm(24, 16)
eksempelrun
<- qic(eksempelrun, title = "Eksempel på seriediagram - normalfordelte genererte tall", ylab = "Verdi", xlab = "Hendelse", method = "anhoej")
serie1
+ geom_point(size = 2) serie1
Lage fig 6.2:
::p_load(qicharts2, ggplot2)
pacman
set.seed(43)
13:24] <- rpois(12, 24)
eksempelrun[
<- qic(eksempelrun, title = "Eksempel på seriediagram - modifiserte genererte tall", ylab = "Verdi", xlab = "Hendelse", method = "anhoej")
serie2
+ geom_point(size = 2) serie2
Lage fig 6.3:
::p_load(qicharts2, ggplot2)
pacman
<- qic(eksempelrun, title = "Eksempel på seriediagram - endring i prosess", ylab = "Verdi", xlab = "Hendelse", method = "anhoej", part = 14)
serie3
+ geom_point(size = 2) serie3
Kapittel 7
Lage fig 7.1:
::p_load(qicharts2, tidyverse)
pacman
set.seed(81)
<- (rnorm(24))
kontr_eks
<- qic(kontr_eks, chart = 'i', title = "Eksempel på kontrolldiagram", subtitle = "Tilfeldig genererte tall", ylab = "Verdi", xlab = "Hendelse")
kontr1
+ geom_point(size = 2) kontr1
Lage fig 7.2:
::p_load(qcc, tidyverse)
pacman
set.seed(81)
<- (rnorm(24))
kontr_eks
<- as_tibble(kontr_eks)
kontr_eks
<- qcc(kontr_eks, type = "xbar.one", nsigmas = 3)
kontr2
plot(kontr2, title = "Eksempel på kontrolldiagram - Tilfeldig genererte tall", ylab = "Verdi", xlab = "Hendelse")
Lage fig 7.3:
::p_load(qcc, tidyverse)
pacman
set.seed(64)
<- rnorm(15,2,1)
mode1x <- mode1x[mode1x > 0]
mode1x <- rnorm(15,3,2)
mode2x <- mode2x[mode2x > 0]
mode2x <- (sort(c(mode1x,mode2x)))
kontr_eks2
<- as_tibble(kontr_eks2)
kontr_eks2
<- qcc(kontr_eks2, type = "xbar.one", nsigmas = 3)
qq
plot(qq, title = "Eksempel på kontrolldiagram - Tilfeldig genererte tall", ylab = "Verdi", xlab = "Hendelse")
Lage fig 7.4:
::p_load(qcc, tidyverse, readxl)
pacman
<- as_tibble(read_excel("P_chart.xlsx"))
pdiagr
<- with(pdiagr, qcc(pdiagr$Keisersnitt, pdiagr$Antall_fødsler, type = "p"))
p_chart
plot(p_chart, title = "p-diagram: Andel keisersnitt", xlab = "Måned", ylab = "Andel")
Lage fig 7.5:
::p_load(qcc, tidyverse, readxl)
pacman
<- as_tibble(read_excel("Laneyp.xlsx"))
Lpdiagr
<- with(Lpdiagr, qcc(Lpdiagr$Pr_telefon, Lpdiagr$Medlemmer, type = "p"))
Lp_chart
plot(Lp_chart, title = "p-diagram: Andel kommunisert pr telefon", xlab = "Måned", ylab = "Andel")
Lage fig 7.6:
::p_load(qcc, tidyverse, readxl)
pacman
<- as_tibble(read_excel("np_diagram.xlsx"))
npdiagr
<- with(npdiagr, qcc(npdiagr$Feil, npdiagr$n, type = "np"))
np_chart
plot(np_chart, title = "np-diagram: Andel feil", xlab = "Uke", ylab = "Andel")
Lage fig 7.7:
::p_load(qcc, tidyverse, readxl)
pacman
<- as_tibble(read_excel("u_diagram.xlsx"))
udiagr
<- with(udiagr, qcc(udiagr$Pasientfall, udiagr$Pasientdager, type = "u"))
u_chart
plot(u_chart, title = "u-diagram: Andel feil", xlab = "Uke", ylab = "Pasientfall")
Lage fig 7.8:
::p_load(qcc, tidyverse, readxl)
pacman
<- as_tibble(read_excel("cdiagram.xlsx"))
cdiagr
<- with(cdiagr, qcc(cdiagr$Feilmedisinering, type = "c"))
c_chart
plot(c_chart, title = "c-diagram", xlab = "Måned", ylab = "Feilmedisinering")
Lage fig 7.9:
::p_load(qcc, tidyverse, readxl)
pacman
<- as_tibble(read_excel("imr_diagram.xlsx"))
imrdiagr
<- with(imrdiagr, qcc(imrdiagr$Fødselsvekt, type = "xbar.one"))
imr_chart
plot(imr_chart, title = "IMR-diagram", xlab = "Baby nr", ylab = "Fødselsvekt")
Lage fig 7.10:
::p_load(qicharts2, tidyverse)
pacman
set.seed(43)
<- seq(as.Date('2020-1-1'), as.Date('2020-12-31'), by = 'day')
datoer <- sort(sample(datoer, 24))
hendelser
<- c(NA, diff(hendelser))
t_diagram_data
<- qic(t_diagram_data,
t_diagram chart = "t",
title = "T-diagram",
xlab = "Hendelse nr.",
ylab = "Dager")
+ geom_point() t_diagram
Vedlegg 2
::p_load(kableExtra)
pacman
<- 10:100
n <- data.frame(
hendelser "Antall observasjoner" = n,
"Øvre grense for serie" = round(log2(n) + 3),
"Nedre grense for antall krysninger" = qbinom(0.05, n - 1, 0.5),
check.names = FALSE)
kbl(hendelser, booktabs = T, longtable = T, caption = "Kritiske verdier", align = 'c') %>%
kable_styling(latex_options = "striped") %>%
kable_styling(latex_options = "repeat_header") %>%
row_spec(0, bold = T)
Vedlegg 3
Lage generisk grafisk framstilling av Chebyshevs teorem
::p_load(tidyverse)
pacman
# For å lage eksempelet lager vi to normalfordelte datasett med ulik gjennomsnitt og standardavvik som vi slår sammen
set.seed(10)
<- rnorm(1000,2,1)
mode1 <- mode1[mode1 > 0]
mode1 <- rnorm(1000,6,2)
mode2 <- mode2[mode2 > 0]
mode2 <- as_tibble(sort(c(mode1,mode2)))
modex2
# Deretter setter vi dataene inn i et diagram
ggplot(modex2, aes(x=value)) +
geom_density() +
ggtitle("Eksempel: Bimodal distribusjon", subtitle = "Eksemplet er kun illustrativt, ikke nøyaktig eller basert på reelle data") +
labs(x = "", y = "") +
theme_classic() +
theme(legend.position = "none") +
scale_x_discrete(labels = NULL, breaks = NULL) +
labs(x = "") +
xlim(-0.02, 8) +
ylim(-0.02,0.3) +
annotate("segment", x = 4, y = 0, xend = 4, yend = 0.15, linetype = "dashed", color = "red") +
annotate('text', x = 4, y = -0.015, label = "bar(x)", parse = TRUE, size = 5) +
annotate("segment", x = 7.8, y = 0, xend = 7.8, yend = 0.275, color = "darkgreen") +
annotate('text', x = 7.8, y = -0.015, label = "bar(x) ~ + ~ 3 ~ sd", parse = TRUE, size = 5) +
annotate("segment", x = 0.2, y = 0, xend = 0.2, yend = 0.275, color = "darkgreen") +
annotate('text', x = 0.3, y = -0.015, label = "bar(x)~-~3~sd", parse = TRUE, size = 5) +
annotate("segment", x = 2.1, y = 0, xend = 2.1, yend = 0.235, color = "blue") +
annotate('text', x = 2.1, y = -0.015, label = "bar(x)~-~2~sd", parse = TRUE, size = 5) +
annotate("segment", x = 5.9, y = 0, xend = 5.9, yend = 0.235, color = "blue") +
annotate('text', x = 5.9, y = -0.015, label = "bar(x)~+~2~sd", parse = TRUE, size = 5) +
annotate("text", x=4, y=0.23, label="Minst 75 %", color = "blue") +
annotate("segment", x = 3.35, y = 0.23, xend = 2.25, yend = 0.23, color = "blue") +
annotate("segment", x = 4.6, y = 0.23, xend = 5.8, yend = 0.23, color = "blue") +
annotate("text", x=4, y=0.27, label="Minst 88.89 %", color = "darkgreen") +
annotate("segment",x = 0.3, y = 0.27, xend = 3.2, yend = 0.27, color = "darkgreen") +
annotate("segment", x = 4.8, y = 0.27, xend = 7.7, yend = 0.27, color = "darkgreen")
Lage tabell over k og prosent
::p_load(tidyverse, kableExtra, knitr)
pacman<- seq(1,4,by = 0.1)
k <- 1-(1/k^2)
auc <- round(auc*100)
auc.percent <- as_tibble(cbind(k,auc.percent))
cheb_table
kbl(cheb_table) %>%
kable_styling(bootstrap_options = "striped", full_width = F) %>%
kable_paper() %>%
scroll_box(height = "200px")
Lage graf over k og prosent
plot(k,
auc.percent, col = 'blue',
pch = 10,
xlab = 'k',
ylab = 'Prosent',
main = 'Chebyshevs teorem' )
abline(v=2, col="red", lwd = 1)
abline(h=75, col="red", lwd = 1)
Vedlegg 4
Lage ikke-normalfordelte data og plotte Q-Q plott
::p_load(ggplot2, tidyverse, magrittr, dplyr, truncnorm)
pacman
<- 1e4
nn set.seed(1)
<- as_tibble(c(rtruncnorm(nn/2, a=1, b=5, mean=2, sd=.5),
sims rtruncnorm(nn/2, a=1, b=5, mean=4, sd=.5)))
ggplot(sims, aes(sample=value)) + stat_qq() + stat_qq_line(col = "red")
Lage første histogram (populasjonen)
::p_load(ggplot2, tidyverse, magrittr, dplyr, truncnorm)
pacman
<- 1e4
nn set.seed(1)
<- c(rtruncnorm(nn/2, a=1, b=5, mean=2, sd=.5),
sims rtruncnorm(nn/2, a=1, b=5, mean=4, sd=.5))
<- as.character(abs(as.integer((sims - mean(sims)) / sd(sims))))
mySD <- data.frame(sims, mySD)
myDF <- as.integer(max(abs(sims)))
xAxis <- round(mean(sims),2)
mu <- round(sd(sims),2)
sd <- sd/10
myBin ggplot(myDF, aes(sims)) +
geom_histogram(aes(fill = mySD), binwidth = myBin, col="black", size=.1) + # change binwidth
labs(x="x", y="Frequency") +
labs(title="Histogram av generert bimodal distribusjon",subtitle=paste0( "Gj.snitt = ", mu, ", sd = ", sd)) +
scale_x_continuous(breaks = seq(mu-sd*5, mu+sd*5, sd)) +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5)) +
guides(fill=guide_legend(expression(sigma)))
Lage histogram over 100 utvalg av 30
<- 100
n <- 30
sampSize <- rep(NA, n)
xbar for (i in 1:n) {
<- sample(sims, size = sampSize)
mysamp <- mean(mysamp)
xbar[i]
}
<-as.character( abs(as.integer((xbar - mean(xbar)) / sd(xbar) )))
mySD<-data.frame(xbar,mySD)
myDF<-as.integer(max(abs(xbar)))
xAxis<-round(mean(xbar),2)
mu<-round(sd(xbar),2)
sd<-sd/10
myBinggplot(myDF, aes(xbar)) +
geom_histogram(aes(fill = mySD), binwidth = myBin, col="black", size=.1) + # change binwidth
labs(x="x", y="Frequency") +
labs(title="Histogram for genererte utvalg",
subtitle=paste0( "mean = ", mu, ", sd = ", sd, ", Utvalgsstørrelse = ",sampSize,", Antall utvalg = ",n))+
scale_x_continuous(breaks = seq(mu-sd*5, mu+sd*5, sd))+
theme_bw()+
theme(plot.title = element_text(hjust = 0.5))+
guides(fill=guide_legend(expression(sigma)))+
geom_density(aes(y=..count../90))
Lage histogram over 1000 utvalg av 30
<-1000
n<-30
sampSize<- rep(NA, n)
xbar for (i in 1:n) {
<- sample(sims, size = sampSize)
mysamp <- mean(mysamp)
xbar[i]
}
<-as.character( abs(as.integer((xbar - mean(xbar)) / sd(xbar) )))
mySD<-data.frame(xbar,mySD)
myDF<-as.integer(max(abs(xbar)))
xAxis<-round(mean(xbar),2)
mu<-round(sd(xbar),2)
sd<-sd/10
myBinggplot(myDF, aes(xbar)) +
geom_histogram(aes(fill = mySD), binwidth = myBin, col="black", size=.1) + # change binwidth
labs(x="x", y="Frequency") +
labs(title="Histogram for genererte utvalg",
subtitle=paste0( "Gj.snitt = ", mu, ", sd = ", sd, ", Utvalgsstørrelse = ",sampSize,", Antall utvalg = ",n))+
scale_x_continuous(breaks = seq(mu-sd*5, mu+sd*5, sd))+
theme_bw()+
theme(plot.title = element_text(hjust = 0.5))+
guides(fill=guide_legend(expression(sigma)))+
geom_density(aes(y=..count../90))
Lage histogram over 1000 utvalg av 30
<-10000
n<-30
sampSize<- rep(NA, n)
xbar for (i in 1:n) {
<- sample(sims, size = sampSize)
mysamp <- mean(mysamp)
xbar[i]
}
<-as.character( abs(as.integer((xbar - mean(xbar)) / sd(xbar) )))
mySD<-data.frame(xbar,mySD)
myDF<-as.integer(max(abs(xbar)))
xAxis<-round(mean(xbar),2)
mu<-round(sd(xbar),2)
sd<-sd/10
myBinggplot(myDF, aes(xbar)) +
geom_histogram(aes(fill = mySD), binwidth = myBin, col="black", size=.1) + # change binwidth
labs(x="x", y="Frequency") +
labs(title="Histogram for genererte utvalg",
subtitle=paste0( "Gj.snitt = ", mu, ", sd = ", sd, ", Utvalgsstørrelsee = ",sampSize,", Antall utvalg = ",n))+
scale_x_continuous(breaks = seq(mu-sd*5, mu+sd*5, sd))+
theme_bw()+
theme(plot.title = element_text(hjust = 0.5))+
guides(fill=guide_legend(expression(sigma)))+
geom_density(aes(y=..count../90))
Vedlegg 5
::p_load(flextable, SixSigma, officer, magrittr)
pacman
<- 25
nmax <- 2:nmax
n
<- sapply(2:nmax, ss.cc.getd2)
d2 <- sapply(2:nmax, ss.cc.getd3)
d3 <- sapply(2:nmax, ss.cc.getc4)
c4 <- 3/(d2*sqrt(n))
A2 <- sapply(1:(nmax-1), function(x){
D3 max(c(0, 1 - 3*(d3[x]/d2[x])))})
<- (1 + 3*(d3/d2))
D4 <- sapply(1:(nmax-1), function(x){
B3 max(0, 1 - 3*(sqrt(1-c4[x]^2)/c4[x]))})
<- 1 + 3*(sqrt(1-c4^2)/c4)
B4
<- data.frame(n, A2, d2, d3, c4,
constdf
D3, D4, B3, B4)
set_flextable_defaults(big.mark = " ",
font.size = 10,
theme_fun = theme_vanilla,
padding.bottom = 6,
padding.top = 6,
padding.left = 6,
padding.right = 6,
background.color = "#EFEFEF")
<- flextable(constdf)
constdf1
<- align(constdf1, align = "center", part = "all")
constdf1
<- add_header_lines(constdf1, values = "Tabell med konstanter for kontrolldiagram")
constdf1
constdf1