Importing the Munich Rent Index data:
source("import_data/munich_rent_index.R")
Adjust the model:
\[ \text{rent}_{i} = \beta_{0} + \beta_{1} \text{area}_{i} + \beta_{2} \text{yearco}_{i} + \beta_{3} \text{yearco2}_{i} + \beta_{4} \text{yearco3}_{i} + \epsilon_{i} \]
munich_rent_poly_yearc <- poly(munich_rent_index$yearc, 3)
munich_rent_index[, c("yearco", "yearco2", "yearco3")] <- munich_rent_poly_yearc
munich_regression <- lm(rent ~ area + yearco + yearco2 + yearco3, data = munich_rent_index)
summary(munich_regression)
Call:
lm(formula = rent ~ area + yearco + yearco2 + yearco3, data = munich_rent_index)
Residuals:
Min 1Q Median 3Q Max
-758.18 -89.06 -8.24 83.59 1039.08
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 113.3754 8.2222 13.789 <2e-16 ***
area 5.1364 0.1156 44.425 <2e-16 ***
yearco 3016.5227 150.2419 20.078 <2e-16 ***
yearco2 1747.8691 148.1177 11.801 <2e-16 ***
yearco3 -11.0154 146.0896 -0.075 0.94
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 146.1 on 3077 degrees of freedom
Multiple R-squared: 0.4433, Adjusted R-squared: 0.4425
F-statistic: 612.5 on 4 and 3077 DF, p-value: < 2.2e-16
And we visualize:
par(mfrow = c(3, 2))
area_seq <- seq(min(munich_rent_index$area), max(munich_rent_index$area), length.out = 100)
yearc_seq <- seq(min(munich_rent_index$yearc), max(munich_rent_index$yearc), length.out = 100)
munich_beta <- munich_regression$coefficients
area_effect <- function(area){
munich_beta[2] * area
}
yearc_effect <- function(yearc){
yearc_poly <- predict(munich_rent_poly_yearc, yearc)
munich_beta[3] * yearc_poly[, 1] + munich_beta[4] * yearc_poly[, 2] + munich_beta[5] * yearc_poly[, 3]
}
partial_area <- munich_rent_index$rent - munich_beta[1] - yearc_effect(munich_rent_index$yearc)
plot(
munich_rent_index$area,
partial_area,
main = 'effect of area incl. partial residuals',
xlab = 'area in sqm',
ylab = 'effect of area'
)
lines(area_seq, area_effect(area_seq), col = 'red', lwd = 2)
partial_year <- munich_rent_index$rent - munich_beta[1] - area_effect(munich_rent_index$area)
plot(
munich_rent_index$yearc,
partial_year,
main = 'effect of year of construction incl. partial residuals',
xlab = 'year of construction',
ylab = 'effect of year of construction'
)
lines(yearc_seq, yearc_effect(yearc_seq), col = 'red', lwd = 2)
studentized_residuals <- rstudent(munich_regression)
plot(
munich_regression$fitted.values,
studentized_residuals,
main = 'studentized residuals vs. estimated rent',
xlab = 'estimated rent',
ylab = 'studentized residuals'
)
plot(
munich_rent_index$area,
studentized_residuals,
main = 'studentized residuals vs. area',
xlab = 'area in sqm',
ylab = 'studentized residuals'
)
plot(
munich_rent_index$yearc,
studentized_residuals,
main = 'studentized residuals vs. year of construction',
xlab = 'year of construction',
ylab = 'studentized residuals'
)
Assuming the variance model:
\[ {\sigma_{i}}^{2} = \sigma_{i} \cdot h \left ( \alpha_{0} + \alpha_{1} \text{areao}_{i} + \alpha_{2} \text{areao2}_{i} + \alpha_{3} \text{areao3}_{i} + \alpha_{4} \text{yearco}_{i} + \alpha_{5} \text{yearco2}_{i} + \alpha_{6} \text{yearco3}_{i} \right ) \]
We add the polynomial of degree 3 of \(\text{area}\):
munich_rent_poly_area <- poly(munich_rent_index$area, 3)
munich_rent_index[, c("areao", "areao2", "areao3")] <- munich_rent_poly_area
We calculate the Breusch and Pagan Test:
\[ {\sigma_{i}}^{2} = \sigma_{i} \cdot h \left ( \alpha_{0} + \alpha_{1} {z}_{i1} + \cdots + \alpha_{q} {z}_{iq} \right ) \]
\[ \begin{matrix} H_{0}: \alpha_{1} = \cdots = \alpha_{q} = 0 & \text{against} & H_{1}: \alpha_{j} \neq 0 \text{ for at least one } j \end{matrix} \]
\[ g_{i} = \frac{{\hat{\epsilon_{i}}}^{2}}{{\hat{\sigma}}_{ML}^{2}} \]
\[ T = \frac{1}{2} \sum_{i=1}^{2} \left ( \hat{g}_{i} - \bar{g} \right )^{2} \]
We calculate using R package lmtest:
library(lmtest)
bptest(
munich_regression,
~ areao + areao2 + areao3 + yearco + yearco2 + yearco3,
data = munich_rent_index
)
studentized Breusch-Pagan test
data: munich_regression
BP = 558.7, df = 6, p-value < 2.2e-16
Using the model of Example 4.1:
summary(munich_regression)
Call:
lm(formula = rent ~ area + yearco + yearco2 + yearco3, data = munich_rent_index)
Residuals:
Min 1Q Median 3Q Max
-758.18 -89.06 -8.24 83.59 1039.08
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 113.3754 8.2222 13.789 <2e-16 ***
area 5.1364 0.1156 44.425 <2e-16 ***
yearco 3016.5227 150.2419 20.078 <2e-16 ***
yearco2 1747.8691 148.1177 11.801 <2e-16 ***
yearco3 -11.0154 146.0896 -0.075 0.94
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 146.1 on 3077 degrees of freedom
Multiple R-squared: 0.4433, Adjusted R-squared: 0.4425
F-statistic: 612.5 on 4 and 3077 DF, p-value: < 2.2e-16
Estimate the regression model of example
\[ \log({\hat{\epsilon}_{i}}^{2}) = \alpha_{0} + \alpha_{1} \text{areao}_{i} + \alpha_{2} \text{areao2}_{i} + \alpha_{3} \text{areao3}_{i} + \alpha_{4} \text{yearco}_{i} + \alpha_{5} \text{yearco2}_{i} + \alpha_{6} \text{yearco3}_{i} + \nu_{i} \]
par(mfrow = c(2, 2))
log_sq_residuals <- log(residuals(munich_regression)^2)
variance_model <- lm(
log_sq_residuals ~ areao + areao2 + areao3 + yearco + yearco2 + yearco3,
data = munich_rent_index
)
summary(variance_model)
Call:
lm(formula = log_sq_residuals ~ areao + areao2 + areao3 + yearco +
yearco2 + yearco3, data = munich_rent_index)
Residuals:
Min 1Q Median 3Q Max
-14.1548 -0.9571 0.5289 1.4981 4.2336
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 8.47901 0.03988 212.594 < 2e-16 ***
areao 34.91018 2.30869 15.121 < 2e-16 ***
areao2 -6.91661 2.22960 -3.102 0.00194 **
areao3 2.10026 2.21900 0.946 0.34397
yearco -13.85151 2.28931 -6.051 1.62e-09 ***
yearco2 6.72398 2.25083 2.987 0.00284 **
yearco3 11.08694 2.21764 4.999 6.07e-07 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 2.214 on 3075 degrees of freedom
Multiple R-squared: 0.1125, Adjusted R-squared: 0.1108
F-statistic: 64.96 on 6 and 3075 DF, p-value: < 2.2e-16
plot(
munich_rent_index$area,
log_sq_residuals,
main = 'log. squared residuals vs. area',
xlab = 'log. squared residuals',
ylab = 'area in sqm'
)
plot(
munich_rent_index$yearc,
log_sq_residuals,
main = 'log. squared residuals vs. year of construction',
xlab = 'log. squared residuals',
ylab = 'year of construction'
)
variance_beta <- variance_model$coefficients
area_variance_effect <- function(area){
i_poly_area <- predict(munich_rent_poly_area, area)
variance_beta[2] * i_poly_area[, 1] + variance_beta[3] * i_poly_area[, 2] + variance_beta[4] * i_poly_area[, 3]
}
yearc_variance_effect <- function(yearc){
yearc_poly <- predict(munich_rent_poly_yearc, yearc)
variance_beta[5] * yearc_poly[, 1] + variance_beta[6] * yearc_poly[, 2] + variance_beta[7] * yearc_poly[, 3]
}
plot(
munich_rent_index$area,
log_sq_residuals - variance_beta[1] - yearc_variance_effect(munich_rent_index$yearc),
main = 'residual regression: effect of area',
xlab = 'effect of area',
ylab = 'area in sqm'
)
lines(area_seq, area_variance_effect(area_seq), col = 'red', lwd = 2)
plot(
munich_rent_index$yearc,
log_sq_residuals - variance_beta[1] - area_variance_effect(munich_rent_index$area),
main = 'residual regression: effect of year of construction',
xlab = 'effect of year of construction',
ylab = 'year of construction'
)
lines(yearc_seq, yearc_variance_effect(yearc_seq), col = 'red', lwd = 2)
We calculate the Weight:
weight <- 1 / exp(variance_model$fitted.values)
head(weight)
1 2 3 4 5 6
3.548163e-04 5.247798e-05 1.127534e-03 7.297107e-04 1.226543e-04 2.707725e-04
And now the adjustment:
weighted_linear_model <- lm(
rent ~ area + yearco + yearco2 + yearco3,
data = munich_rent_index,
weights = weight
)
summary(weighted_linear_model)
Call:
lm(formula = rent ~ area + yearco + yearco2 + yearco3, data = munich_rent_index,
weights = weight)
Weighted Residuals:
Min 1Q Median 3Q Max
-6.8645 -1.3168 -0.1547 1.3007 8.5157
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 124.3734 5.8569 21.235 <2e-16 ***
area 4.9643 0.1004 49.430 <2e-16 ***
yearco 2703.4555 147.4826 18.331 <2e-16 ***
yearco2 1403.5794 129.5723 10.832 <2e-16 ***
yearco3 -73.4991 132.1618 -0.556 0.578
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.855 on 3077 degrees of freedom
Multiple R-squared: 0.4947, Adjusted R-squared: 0.4941
F-statistic: 753.2 on 4 and 3077 DF, p-value: < 2.2e-16
And we compare the covariance matrix of both model:
vcov(munich_regression)
(Intercept) area yearco yearco2 yearco3
(Intercept) 67.6043077 -0.90063685 -273.475196 190.606709 -11.1223265
area -0.9006368 0.01336757 4.059016 -2.829052 0.1650815
yearco -273.4751960 4.05901572 22572.641836 -859.031732 50.1264169
yearco2 190.6067095 -2.82905229 -859.031732 21938.863890 -34.9371040
yearco3 -11.1223265 0.16508151 50.126417 -34.937104 21342.1747138
vcov(weighted_linear_model)
(Intercept) area yearco yearco2 yearco3
(Intercept) 34.3033928 -0.54385319 -238.469903 175.705533 70.5099712
area -0.5438532 0.01008615 2.273448 -1.993453 -0.4420123
yearco -238.4699026 2.27344774 21751.123356 -4179.319393 2680.1687402
yearco2 175.7055331 -1.99345269 -4179.319393 16788.973668 -1332.6752253
yearco3 70.5099712 -0.44201233 2680.168740 -1332.675225 17466.7502213
And the confidence intervals:
linear_confint <- confint(munich_regression)
linear_confint
2.5 % 97.5 %
(Intercept) 97.253848 129.496900
area 4.909675 5.363069
yearco 2721.938054 3311.107407
yearco2 1457.449437 2038.288772
yearco3 -297.458491 275.427613
weighted_confint <- confint(weighted_linear_model)
weighted_confint
2.5 % 97.5 %
(Intercept) 112.889518 135.857217
area 4.767364 5.161197
yearco 2414.281147 2992.629893
yearco2 1149.522483 1657.636326
yearco3 -332.633438 185.635293
We use R to obtain the covariance matrix and the confidence intervals:
library(sandwich)
robust_se <- sqrt(diag(vcovHC(munich_regression, type = "HC1")))
coefs <- munich_regression$coefficients
z <- qnorm(1 - 0.05 / 2)
lower <- coefs - z * robust_se
upper <- coefs + z * robust_se
confint_robust <- data.frame(lower, upper)
colnames(confint_robust) <- colnames(linear_confint)
confint_robust