# Exercises for Chapter8
# 20200528
# Exercise 1
T8E.1 <- data.frame(Age = c(15:29, 6:10*5+2, 60),
MPA = c(15:29+0.5, 6:10*5+2.5, 60),
MRG = c(0, 2685, 7852, 21660, 31146, 35903, 35139, 31222,
26139, 20649, 16395, 13276, 11143, 9248, 8121,
30122, 17174, 10837, 7637, 5205, 10420))
T8E.2 <- data.frame(Age = c(3:11*5+2.5),
SINGLE = c(1877926, 945341, 312682, 157891, 92276,
74654, 76095, 90421, 104554),
MARRIED = c(87767, 781760, 1220771, 1531978, 1318860,
1186132, 1129956, 1145657, 1123748),
WIDDIV = c(495, 32420, 93643, 131671, 126890,
126292, 132019, 78320, 245741))
# (a) mean age of marriage of females in England and Wales in 1981
sum(T8E.1$MPA*T8E.1$MRG)/sum(T8E.1$MRG)
# (b) median age of marriage of females in England and Wales in 1981
T8E.1$PCSMRG <- cumsum(T8E.1$MRG)/sum(T8E.1$MRG)
T8E.1$Age[which.min(ifelse(T8E.1$PCSMRG<0.5, 1, T8E.1$PCSMRG))] # median
IndivAges <- c(rep(T8E.1$Age, T8E.1$MRG))
median(IndivAges) # alternative method to seek median
# When considering that age 18 means [18, 19),
# 23 means some age in [23, 24).
# truemedian() in fmsb packages estimates such interpolated median
# Note: truemedian() assumes that 23 means [22.5, 23.5), thus 0.5
# must be added to the result.
library(fmsb)
truemedian(IndivAges)+0.5 # True median
# (c) SMAM (Singulate Mean Age at Marriage) for females from 1981 census
# Total years of singleness living before age 50
T8E.2$PSG <- T8E.2$SINGLE/(T8E.2$SINGLE+T8E.2$MARRIED+T8E.2$WIDDIV)*100
TYSLBA50 <- 15*100 + sum(T8E.2$PSG[T8E.2$Age < 50])*5
# Percentage still single at exact age 50
PSA50 <- mean(T8E.2$PSG[(T8E.2$Age > 45) & (T8E.2$Age < 55)])
SMAM <- (TYSLBA50 - PSA50*50) / (100 - PSA50)
print(SMAM)
# The mean was larger than median and SMAM (those were almost same)
# The answer to the question "Would the SMAM and the mean age at
# marriage be the same if the mean was calculated using only first
# marriages?": No. The differences are essentially caused by
# the fact that mean and median ages of marriage were calculated
# only using age at marriage in 1981, but the SMAM used the marriage
# information which occurred several decades ago. The distribution
# of ages at marriage in 1981 was skewed, so that median gives better
# estimate.
#
# Exercise 2 (Calculate net nupitiality table)
T8E.3 <- data.frame(Age = 16:29,
gx = c(59, 360, 1644, 3457, 6073, 9078, 11081, 12716, 13191,
13395, 12396, 11914, 11964, 11414)/100000,
qx = c(52, 110, 115, 106, 105, 96, 89, 90, 102, 114, 107, 127,
160, 162)/100000)
T8E.3$NevMar <- c(0.979, rep(0, 13))
T8E.3$EvMar <- rep(0, 14)
T8E.3$DieSgl <- 1 - T8E.3$NevMar
for (i in 2:14) {
T8E.3$DieSgl[i] <- T8E.3$NevMar[i-1]*T8E.3$qx[i-1]
T8E.3$EvMar[i] <- T8E.3$NevMar[i-1]*T8E.3$gx[i-1]
T8E.3$NevMar[i] <- T8E.3$NevMar[i-1] - (T8E.3$DieSgl[i] + T8E.3$EvMar[i])
}
print(T8E.3)
# Exercise 3
T8E.4 <- data.frame(AG = 2:10*5,
S1935 = c(1000, 988, 783, 486, 337, 270, 240, 231, 221),
S1940 = c(1000, 981, 716, 394, 272, 248, 231, 222, 222),
S1945 = c(1000, 970, 636, 304, 204, 190, 204, 210, 210))
# (a) Trend in nupitiality
print(T8E.4)
# The later the data are, the earlier marriges are.
#
# (b) SMAM for each year. Which of crude mean and SMAM is higher in 1945?
CalcSMAM <- function(DATA) {
.DATA <- DATA/10 # change to percent
.TYSLBA50 <- 15*100 + sum(.DATA[2:8])*5
.PSA50 <- mean(.DATA[8:9])
.SMAM <- (.TYSLBA50 - .PSA50*50) / (100 - .PSA50)
return(.SMAM)
}
CalcSMAM(T8E.4$S1935)
CalcSMAM(T8E.4$S1940)
CalcSMAM(T8E.4$S1945)
BM1945 <- PS1945 <- T8E.4$S1945
BM1945[1] <- 0
for (i in 2:9) { BM1945[i] <- (PS1945[i-1] - PS1945[i]) / PS1945[i-1] }
sum(BM1945*T8E.4$AG)/sum(BM1945) # mean
# Here, mean age (23.8) was slightly higher than SMAM (22.9) in 1945.
# Generally considering, SMAM tends to change slower than mean age of
# marriage, because SMAM reflects the nupitiality over previous several
# decades. Since the SMAM is age-standardized but mean age is not,
# this relationship is broken when the age structure has strange shape.
#
# (c) How to produce nupitiality indices for 1935-40 and 1940-5?
# Consider the synthetic cohort and compare.
CM3540 <- c(0, (T8E.4$S1935[-9] - T8E.4$S1940[-1]))
PUM1940 <- T8E.4$S1940*(1-CM3540/1000)
CalcSMAM(PUM1940)
CM4045 <- c(0, (T8E.4$S1940[-9] - T8E.4$S1945[-1]))
PUM1945 <- T8E.4$S1945*(1-CM4045/1000)
CalcSMAM(PUM1945)
# Using synthetic cohort, SMAM becomes earlier than periodical data.