In this article, the "faitful" data set from the R library "datasets" is analyzed. It contains the R code used, as well as the (german) commentary.
The data set is composed of the waiting time between eruptions and the duration of the eruption
for the "Old Faithful geyser" in Yellowstone National Park, Wyoming, USA. Download(R code).
# Knowledgedump.org - Analyse des "Old Faithful Geyser" Datensatzes
#1. Laden der Daten
rm(list=ls()) #Loeschen von ggf. definierten Objekten
library(datasets) #Laden des Pakets mit "faithful" Datensatz
#2. Old Faithful Geysir Daten
faithful[1:10,]
str(faithful)
summary(faithful)
help(faithful)
par(mfrow=c(2,2))
plot(faithful[1:50,1], type="l", main="Eruptsionsdauer nach Index",
xlab="Index", ylab="Eruptionsdauer in Minuten")
panel.smooth(1:50,faithful[1:50,1])
plot(faithful[1:50,2], type="l", main="Wartezeiten nach Index",
xlab="Index", ylab="Wartezeit in Minuten")
panel.smooth(1:50,faithful[1:50,2])
hist(faithful$eruptions, main = "Verteilung von Eruptionsdauer",
xlab = "Eruptionsdauer", ylab = "Dichte", freq=F)
lines(density(faithful$eruptions), col = "red")
hist(faithful$waiting, main = "Verteilung von Wartezeiten",
xlab = "Wartezeit", ylab = "Dichte", freq=F)
lines(density(faithful$waiting), col = "red")
par(mfrow=c(1,1))
f_sek<-table(round(faithful[,1]*60))
plot(names(f_sek), f_sek, type="h",
main="Anzahl der vorkommenden Eruptsionszeiten",
xlab="Eruptionsdauer", ylab="Anzahl der Vorkommnis")
f_sek[f_sek>=4]
plot(faithful, main="Eruptionsdauer gg. Wartezeit",
xlab="Eruptionsdauer in Minuten", ylab="Wartezeit in Minuten")
panel.smooth(faithful[,1],faithful[,2]) #lowess() mit default Werten
#3. Vorhersage der naechsten Eruption
lmfaithful<-lm(faithful$waiting ~ faithful$eruptions, data=faithful)
lmfaithful
plot(faithful, main="Eruptionsdauer gg. Wartezeit",
xlab="Eruptionsdauer in Minuten", ylab="Wartezeit in Minuten")
abline(lmfaithful, col="blue")
abline(30,10 ,col="red")
pred1<-faithful$eruptions*10.73+33.47 # =lmfaithful$fitted
pred2<-faithful$eruptions*10+30
summary(pred1)
summary(pred2)
summary(faithful$waiting)
spaet1<-subset(pred1-faithful$waiting, pred1-faithful$waiting>1)
spaet2<-subset(pred2-faithful$waiting, pred2-faithful$waiting>1)
spaet1
spaet2
summary(lmfaithful)
plot(lmfaithful$fitted, stdres(lmfaithful),
sub = "lm(waiting ~ eruptions)", main = "Std. Residuen vs Fit",
xlab = "Fit", ylab = "Std. Residuen")
abline(h = 0, lty = 3, col = "gray")
panel.smooth(lmfaithful$fitted,stdres(lmfaithful))
str(subset(faithful$eruptions, faithful$eruptions>=3.5))
str(subset(faithful$eruptions, faithful$eruptions<3.5))
plot(hatvalues(lmfaithful), main = "Hebelgewicht vs Index",
ylab = "Hebelgewicht", xlab = "Index")
panel.smooth(1:272,hatvalues(lmfaithful))
plot(lmfaithful, which=2)
plot(lmfaithful, which=3)
par(mfrow=c(1,2))
plot(lmfaithful, which=4:5)
#4. Modellalternativen
if (!require("quantreg"))
install.packages("quantreg",
repos="http://cran.us.r-project.org", dependencies=TRUE)
library(quantreg)
rq1<-rq(waiting~eruptions, data=faithful, tau=0.01)
rq50<-rq(waiting~eruptions, data=faithful, tau=0.5)
rq99<-rq(waiting~eruptions, data=faithful, tau=0.99)
plot(faithful, main="Eruptionsdauer gg. Wartezeit",
xlab="Eruptionsdauer in Minuten", ylab="Wartezeit in Minuten")
abline(lmfaithful, col="blue")
abline(rq1, col="red")
abline(rq99, col="red")
abline(rq50, col="green")
plot(faithful, main="Eruptionsdauer gg. Wartezeit",
xlab="Eruptionsdauer in Minuten", ylab="Wartezeit in Minuten")
panel.smooth(faithful[,1],faithful[,2])#lowess() mit default Werten
f1<-subset(faithful,faithful[,1]<4)
f2<-subset(faithful,faithful[,1]>=4)
lmf1<-lm(waiting~eruptions, data=f1)
lmf2<-lm(waiting~eruptions, data=f2)
abline(lmf1, col="green")
abline(lmf2, col="green")
abline(lmfaithful, col="blue")
summary(lmf2)
summary(lmf1)