We import the data from the official page of the dataset.
munich_rent_url <- "https://www.uni-goettingen.de/de/document/download/64c29c1b1fccb142cfa8f29a942a9e05.raw/rent99.raw"
munich_rent_index <- read.table(
url(munich_rent_url),
header = 1,
colClasses = c(
"numeric", "numeric", "numeric",
"numeric", "factor", "factor",
"factor", "factor", "factor"
)
)
# Convert to logical
munich_rent_index$bath <- munich_rent_index$bath == 1
munich_rent_index$kitchen <- munich_rent_index$kitchen == 1
munich_rent_index$cheating <- munich_rent_index$cheating == 1
summary(munich_rent_index)
rent rentsqm area yearc location
Min. : 40.51 Min. : 0.4158 Min. : 20.00 Min. :1918 1:1794
1st Qu.: 322.03 1st Qu.: 5.2610 1st Qu.: 51.00 1st Qu.:1939 2:1210
Median : 426.97 Median : 6.9802 Median : 65.00 Median :1959 3: 78
Mean : 459.44 Mean : 7.1113 Mean : 67.37 Mean :1956
3rd Qu.: 559.36 3rd Qu.: 8.8408 3rd Qu.: 81.00 3rd Qu.:1972
Max. :1843.38 Max. :17.7216 Max. :160.00 Max. :1997
bath kitchen cheating district
Mode :logical Mode :logical Mode :logical 411 : 53
FALSE:2891 FALSE:2951 FALSE:321 623 : 53
TRUE :191 TRUE :131 TRUE :2761 350 : 49
563 : 49
711 : 42
360 : 38
(Other):2798
For future use, this code is going to be available in an R script in
import_data/munich_rent_index.R.
zambia_url <- "https://www.uni-goettingen.de/de/document/download/d90a2d7b26c4504ab6630cf36cbae2fa.raw/zambia_height92.raw"
malnutrition_zambia <- read.table(
url(zambia_url),
header = 1,
colClasses = c(
"numeric", "factor", "numeric",
"integer", "numeric", "numeric",
"numeric", "factor", "factor",
"factor", "factor", "factor"
)
)
# Convert to logical
malnutrition_zambia$m_work <- malnutrition_zambia$m_work == 1
summary(malnutrition_zambia)
zscore c_gender c_breastf c_age m_agebirth
Min. :-600.0 0:2254 Min. : 0.00 Min. : 0.00 Min. :13.17
1st Qu.:-257.0 1:2167 1st Qu.: 1.00 1st Qu.:12.00 1st Qu.:21.08
Median :-171.0 Median :14.00 Median :26.00 Median :25.33
Mean :-171.2 Mean :11.12 Mean :27.12 Mean :26.40
3rd Qu.: -86.0 3rd Qu.:19.00 3rd Qu.:42.00 3rd Qu.:31.08
Max. : 503.0 Max. :46.00 Max. :59.00 Max. :48.67
m_height m_bmi m_education m_work district
Min. :134.0 Min. :13.15 1: 822 Mode :logical 98 : 488
1st Qu.:154.0 1st Qu.:19.75 2:2756 FALSE:1978 61 : 270
Median :158.1 Median :21.46 3: 767 TRUE :2443 53 : 235
Mean :158.1 Mean :22.00 4: 76 97 : 156
3rd Qu.:162.0 3rd Qu.:23.57 94 : 153
Max. :185.0 Max. :39.29 66 : 128
(Other):2991
region time
2 :967 1:4421
8 :659
5 :609
6 :430
3 :410
4 :394
(Other):952
Code for import in import_data/malnutrition_zambia.R.
patent_url <- "https://www.uni-goettingen.de/de/document/download/66eb4eb0bc0e8f6acf1d02ddf683f077.raw/patentdata.raw"
patent_opposition <- read.table(
url(patent_url),
header = 1,
colClasses = c(
"factor", "factor", "factor",
"factor", "factor", "integer",
"integer", "integer", "integer"
)
)
# Convert to logical
patent_opposition$biopharm <- patent_opposition$biopharm == 1
patent_opposition$ustwin <- patent_opposition$ustwin == 1
patent_opposition$patus <- patent_opposition$patus == 1
patent_opposition$patgsgr <- patent_opposition$patgsgr == 1
summary(patent_opposition)
opp biopharm ustwin patus patgsgr
0:2847 Mode :logical Mode :logical Mode :logical Mode :logical
1:2019 FALSE:2710 FALSE:1905 FALSE:3224 FALSE:3723
TRUE :2156 TRUE :2961 TRUE :1642 TRUE :1143
year ncit ncountry nclaims
Min. :1980 Min. : 0.00 Min. : 1.000 Min. : 1.00
1st Qu.:1989 1st Qu.: 0.00 1st Qu.: 4.000 1st Qu.: 7.00
Median :1992 Median : 1.00 Median : 7.000 Median : 10.00
Mean :1991 Mean : 1.64 Mean : 7.796 Mean : 13.13
3rd Qu.:1994 3rd Qu.: 2.00 3rd Qu.:11.000 3rd Qu.: 16.00
Max. :1997 Max. :40.00 Max. :17.000 Max. :355.00
R code in import_data/patent_opposition.R.
forest_url <- "https://www.uni-goettingen.de/de/document/download/f5ef58e05aff8b6546dcf993aa73a480.raw/beach.raw"
forest_health_status <- read.table(
url(forest_url),
header = 1,
colClasses = c(
"factor", "numeric", "factor",
"numeric", "numeric", "numeric",
"numeric", "numeric", "numeric",
"numeric", "numeric", "factor",
"factor", "factor", "factor",
"factor"
),
na.strings = "."
)
summary(forest_health_status)
id year defol x y
1 : 22 Min. :1983 0 :1116 Min. : 0.70 Min. :0.400
10 : 22 1st Qu.:1988 12.5: 435 1st Qu.: 3.70 1st Qu.:2.100
11 : 22 Median :1994 25 : 126 Median : 8.10 Median :3.000
12 : 22 Mean :1994 37.5: 68 Mean : 7.46 Mean :3.306
13 : 22 3rd Qu.:1999 50 : 29 3rd Qu.:10.50 3rd Qu.:4.100
14 : 22 Max. :2004 62.5: 16 Max. :16.10 Max. :9.000
(Other):1664 75 : 6
age canopyd gradient alt depth
Min. : 7.0 Min. : 0.0 Min. : 0.00 Min. :250 Min. : 9.00
1st Qu.: 65.0 1st Qu.: 70.0 1st Qu.: 6.00 1st Qu.:340 1st Qu.:16.00
Median :112.0 Median : 90.0 Median :14.00 Median :390 Median :23.00
Mean :106.1 Mean : 77.3 Mean :15.45 Mean :387 Mean :24.64
3rd Qu.:148.0 3rd Qu.:100.0 3rd Qu.:21.00 3rd Qu.:440 3rd Qu.:31.00
Max. :234.0 Max. :100.0 Max. :46.00 Max. :480 Max. :51.00
ph watermoisture alkali humus type fert
Min. :3.280 1:198 1:352 1 :512 0:902 0:1453
1st Qu.:4.100 2:990 2:991 0 :463 1:894 1: 343
Median :4.250 3:608 3:308 2 :388
Mean :4.295 4:145 3 :266
3rd Qu.:4.440 4 :118
Max. :6.050 5 : 37
NA's :3 (Other): 12
R code in import_data/forest_health_status.R.
par(mfrow = c(2, 2))
ylab <- 'estimated density'
hist(munich_rent_index$rent, freq=FALSE, xlab = 'net rent in Euro', ylab = ylab)
lines(density(munich_rent_index$rent), col = "red", lwd = 2)
hist(munich_rent_index$rentsqm, freq=FALSE, xlab = 'net rent per sqm in Euro', ylab = ylab)
lines(density(munich_rent_index$rentsqm), col = "red", lwd = 2)
hist(munich_rent_index$area, freq=FALSE, xlab = 'area ub sqm', ylab = ylab)
lines(density(munich_rent_index$area), col = "red", lwd = 2)
hist(munich_rent_index$yearc, freq=FALSE, xlab = 'year of construction', ylab = ylab)
lines(density(munich_rent_index$yearc), col = "red", lwd = 2)
par(mfrow = c(3, 2))
ylab <- 'estimated density'
hist(malnutrition_zambia$zscore, freq=FALSE, xlab = 'child\'s Z-score', ylab = ylab)
lines(density(malnutrition_zambia$zscore), col = "red", lwd = 2)
hist(malnutrition_zambia$c_breastf, freq=FALSE, xlab = 'duration of breast feeding in month', ylab = ylab)
lines(density(malnutrition_zambia$c_breastf), col = "red", lwd = 2)
hist(malnutrition_zambia$c_age, freq=FALSE, xlab = 'child\'s age in months', ylab = ylab)
lines(density(malnutrition_zambia$c_age), col = "red", lwd = 2)
hist(malnutrition_zambia$m_agebirth, freq=FALSE, xlab = 'mother\'s age at birth', ylab = ylab)
lines(density(malnutrition_zambia$m_agebirth), col = "red", lwd = 2)
hist(malnutrition_zambia$m_height, freq=FALSE, xlab = 'mother\'s height in cm', ylab = ylab)
lines(density(malnutrition_zambia$m_height), col = "red", lwd = 2)
hist(malnutrition_zambia$m_bmi, freq=FALSE, xlab = 'mother\'s BMI', ylab = ylab)
lines(density(malnutrition_zambia$m_bmi), col = "red", lwd = 2)
par(mfrow = c(2, 2))
plot(
x = munich_rent_index$area,
y = munich_rent_index$rent,
xlab = "area in sqm",
ylab = "net rent in Euro"
)
plot(
x = munich_rent_index$area,
y = munich_rent_index$rentsqm,
xlab = "area in sqm",
ylab = "net rent per sqm in Euro"
)
plot(
x = munich_rent_index$yearc,
y = munich_rent_index$rent,
xlab = "year of construction",
ylab = "net rent in Euro"
)
plot(
x = munich_rent_index$yearc,
y = munich_rent_index$rentsqm,
xlab = "year of construction",
ylab = "net rent per sqm in Euro"
)
par(mfrow = c(2, 2))
boxplot(
rent ~ area,
data = munich_rent_index,
xlab = "area in sqm",
ylab = "net rent in Euro",
xaxt = "n"
)
boxplot(
rentsqm ~ area,
data = munich_rent_index,
xlab = "area in sqm",
ylab = "net rent per sqm in Euro",
xaxt = "n"
)
boxplot(
rent ~ yearc,
data = munich_rent_index,
xlab = "year of construction",
ylab = "net rent in Euro",
xaxt = "n"
)
boxplot(
rentsqm ~ yearc,
data = munich_rent_index,
xlab = "year of construction",
ylab = "net rent per sqm in Euro",
xaxt = "n"
)
par(mfrow = c(1, 2))
boxplot(
rentsqm ~ location,
data = munich_rent_index,
xlab = '',
ylab = 'net rennt per sqm',
names = c('average', 'good', 'top')
)
plot(density(subset(munich_rent_index, location == 1)$rentsqm), xlab = 'net rent per sqm', ylab = 'estimated density', lwd = 2)
lines(density(subset(munich_rent_index, location == 2)$rentsqm), col = "green", lwd= 2)
lines(density(subset(munich_rent_index, location == 3)$rentsqm), col = "red", lwd= 2)
legend(
"topright",
legend = c('average', 'good', 'top'),
col = c('black', 'green', 'red'),
lwd = 2
)
par(mfrow = c(1, 2))
plot(
x = malnutrition_zambia$c_age,
y = malnutrition_zambia$zscore,
xlab = "child's age in month",
ylab = "Z-score"
)
boxplot(
zscore ~ c_age,
data = malnutrition_zambia,
xlab = "child's age in month",
ylab = "Z-score",
xaxt = "n"
)
par(mfrow = c(3, 2))
boxplot(
zscore ~ c_breastf,
data = malnutrition_zambia,
xlab = "duration of breastfeeding in months",
ylab = "average Z-score",
xaxt = "n"
)
malnutrition_zambia$bmi_group <- cut(
malnutrition_zambia$m_bmi,
breaks = seq(
min(malnutrition_zambia$m_bmi),
max(malnutrition_zambia$m_bmi),
length.out = 101
),
include.lowest = TRUE
)
boxplot(
zscore ~ bmi_group,
data = malnutrition_zambia,
xlab = "mother's BMI",
ylab = "average Z-score",
xaxt = "n"
)
malnutrition_zambia$age_group <- cut(
malnutrition_zambia$m_agebirth,
breaks = seq(
min(malnutrition_zambia$m_agebirth),
max(malnutrition_zambia$m_agebirth),
length.out = 101
),
include.lowest = TRUE
)
boxplot(
zscore ~ age_group,
data = malnutrition_zambia,
xlab = "mother's age in years",
ylab = "average Z-score",
xaxt = "n"
)
malnutrition_zambia$height_group <- cut(
malnutrition_zambia$m_height,
breaks = seq(
min(malnutrition_zambia$m_height),
max(malnutrition_zambia$m_height),
length.out = 101
),
include.lowest = TRUE
)
boxplot(
zscore ~ height_group,
data = malnutrition_zambia,
xlab = "mother's height in cm",
ylab = "average Z-score",
xaxt = "n"
)
boxplot(
zscore ~ m_education,
data = malnutrition_zambia,
xlab = 'mother\'s level of education',
ylab = 'Z-score',
names = c('none', 'primary', 'secondary', 'higher')
)