# R codes for 22 April 2021.
# given by Minato Nakazawa
# * Installation of the "fmsb" and "pyramid" packages.
# Only once for installed R system, the following 2 lines should be typed.
# Whipple's Index is based on Newell C (1988)
# Caretaker ratio is based on Rowland DT (2003)
#
# WhipplesIndex(), CaretakerRatio() and IndexOfDissimilarity() are defined in fmsb.
# Load the fmsb and pyramid packages.
# It's necessary every time to use.
if (require(fmsb)==FALSE) { install.packages("fmsb"); library(fmsb) }
if (require(pyramid)==FALSE) { install.packages("pyramid"); library(pyramid) }
# See the explanation for the dataset Jpopl, included in the fmsb package.
?Jpopl
# Jpopl includes the Japanese census population by sex and each age from year 1888 to 2015.
# The names of variables are the letter "M" or "F" and 4 digits meaning year like
# "M1888" or "F2015". In R, small letter and capital letter are distinguished (case sensitive).
# Reference to the elements in a variable can be done using [ ].
# Examples are shown below.
Jpopl[1, "M2015"] # The first element of the variable M2015. Thus, Age 0 population of 2015 males.
Jpopl$M2015[1] # Exactly same meaning with the previous line.
Jpopl$M2015[Jpopl$Age==0] # Also the same meaning. The variable Age ranges 0 to 110.
# Using several elements of a variable is necessary to calculate Whipple's index.
# Writing the raws to use within "c()", delimited with comma.
# The calculation result can be named by using "<-".
# sum() is the function to return the sum of the elements within a parentheses.
numerator <- sum(Jpopl[c(26, 31, 36, 41, 46, 51, 56, 61), "M2015"]) # the numerator of the Whipple's index
numerator <- sum(Jpopl$M2015[0:7*5+26]) # 0:7*5 is exactly same as c(0, 5, 10, 15, 20, 25, 30, 35).
denominator <- sum(Jpopl[24:63, "M2015"]) # The denominater of the Whipple's index. 24:63 specifies the 23-62 years old.
numerator/denominator*100*5 # This gives the Whipple's index for Japanese males in 2015. The value is 100.7391.
WhipplesIndex(Jpopl$M2015)
sum(Jpopl[0:7*5+26, "F2015"])/sum(Jpopl[24:63, "F2015"])*100*5 # The Whipple's index for Japanese females in 2015 is 100.7669.
WhipplesIndex(Jpopl$F2015)
# According to the judgement criteria, the census in 2015 Japan, heaping is almost not observed.
# In 1888, the values are similar.
WhipplesIndex(Jpop$M1888)
WhipplesIndex(Jpop$F1888)
#
# India census 2011 (Source: https://censusindia.gov.in/2011census/C-series/C-13.html)
India2011 <- read.delim("https://minato.sip21c.org/ldaR/India2011census.txt")
WhipplesIndex(India2011$Males)
WhipplesIndex(India2011$Females)
#
# Drawing population pyramid
# Just draw
pyramids(Left=Jpopl$M2015, Right=Jpopl$F2015, Center=Jpopl$Age) # 2015 Japan
# Very ugly. Limit the printing of ages at center line to one tenth.
pyramids(Left=Jpopl$M2015, Right=Jpopl$F2015, Center=Jpopl$Age, Cstep=10)
# Next, manually set the numeric axis.
# At first, seek the maximum population among all age/sex groups.
max(c(Jpopl$M2015, Jpopl$F2015))
# Slightly more than a million.
# Thus, 0 to 1.2 million by every 200,000 may be put at numeric axis.
# Too many zeros are still ugly, so that divide original values by 1,000
# and show the unit as 1,000. The values shown in numeric axis will be
# 0, 200, 400, 600, 800, 1000, 1200 as follows.
pyramids(Left=Jpopl$M2015/1000, Laxis=0:6*200, Right=Jpopl$F2015/1000,
Center=Jpopl$Age, Cstep=10, Llab="Males( x1000)", Rlab="Females( x1000)",
Clab="", main="Population pyramid for Japan in 2015")
# Such long command line will be given as one line without carriage return,
# otherwise you can type carriage return (new line) after the comma.
# Here, if you type
windows() # in the case of Microsoft Windows. type x11() for MacOS X or Linux, instead.
# Leaving the population pyramid for 2015 Japan as is, and the new graphic
# window will newly open.
# Then, make the population pyramid for 1950 Japan in the new window.
pyramids(Left=Jpopl$M1950/1000, Laxis=0:6*200, Right=Jpopl$F1950/1000,
Center=Jpopl$Age, Cstep=10, Llab="Males (x1000)", Rlab="Females (x1000)",
Clab="", main="Population pyramid for Japan in 1950")
# Comparing the two pyramid will be possible by adjusting the size and position
# of those two windows. Of course, copying the graph and paste to Power Point or
# LibreOffice Impress is possible from the menu of graphic windows.
# Drawing population pyramids of India 2011
pyramids(Left=India2011$Males, Right=India2011$Females, Center=India2011$Age,
Cstep=10, Clab="", AxisFM="d", main="Population pyramid of India in 2011")
# =========================================
# Calculating sex ratios
# At first, loading fmsb
if (require(fmsb)==FALSE) { install.packages("fmsb"); library(fmsb) }
# Calcullate sex ratios at birth (the Jpopl data is not at birth,
# age 0 populations are used as suurogate measure.
Jpopl$M2015[1]/Jpopl$F2015[1]*100 # Japan in 2015
Jpopl$M1950[1]/Jpopl$F1950[1]*100 # Japan in 1950
Jpopl$M1888[1]/Jpopl$F1888[1]*100 # Japan 1n 1888
# All sex ratios at age 0 are approximately 1.05 in Japan.
# However, the ratio is slightly increasing.
# Probably immaturely born boys are saved by naonatal care development.
# In R, calculation is done for vectors.
# So it's easy to calculate sex ratios for all ages.
Jpopl$M2015/Jpopl$F2015*100 # Japan in 2015
Jpopl$M1950/Jpopl$F1950*100 # Japan in 1950
Jpopl$M1888/Jpopl$F1888*100 # Japan in 1888.
# In 1888 data, an extraordinary high value (more than 120) was observed
# at age 42. They were born in 1846's Hinoe-Uma year. Due to Japanese
# superstition, girls born in Hinoe-Uma year are believed to kill and eat men,
# so that there were some intentional misreporting or infanticides.
# In 1966, Hinoe-Uma did not affect the sex ratio, but too low birth rates,
# since the parents wanted to avoid the tragedy of Hinoe-Uma girls.
# The differences between 1846 and 1966 are, (1) availability of effective
# contraceptive devices, (2) rigidity of birth registration, in other words,
# accuracy of registered birth year, and (3) social allowance of infanticide
# (including neglect), probably.
#
# Dependency ratio index written in the text.
(10251300+9098700)/30571500*100
# Calculate dependency ratio
# The dependency ratio largely depends on the definition of working population.
# In Japan, MHLW defines children population (Nensho-Jinko) for ages from 0 to 14,
# and elderly population for ages 60 and more, or 65 and more (varied).
# Actually, nowadays most Japanese people go to high school, so that the definition
# of children population may be invalid. However, this definition conventionally
# used in MHLW statistics.
P2015 <- Jpopl$M2015+Jpopl$F2015 # Calculate the total population in 2015 for each age.
P2015 # See the result
Children2015 <- sum(P2015[1:15]) # Calculate the children population in 2015
Elderly2015 <- sum(P2015[66:111]) # Calculate the elderly population in 2015, when the elderlies are age 65 and more.
Working2015 <- sum(P2015[16:65]) # Calculate the working population in 2015, when the elderlies are age 65 and more.
(Children2015+Elderly2015)/Working2015*100 # The dependency ratio, when the elderlies are age 65 and more.
Elderly2015 <- sum(P2015[61:111]) # Calculate the elderly population in 2015, when the elderlies are age 60 and more.
Working2015 <- sum(P2015[16:60]) # Calculate the working population in 2015, when the elderlies are age 60 and more.
(Children2015+Elderly2015)/Working2015*100 # The dependency ratio, when the elderlies are age 60 and more.
# Note: the two dependency ratios are quite different due to the difference of definition of "elderly" age.
Working2015/sum(P2015[1:111])*100 # The proportion of working population in total.
Elderly2015/Children2015*100 # Aging index (in Japanese, it is called as Rounenka-Shisuu).
sum(Jpopl$M1888[61:111]+Jpopl$F1888[61:111], na.rm=TRUE)/sum(Jpopl$M1888[1:15]+Jpopl$F1888[1:15], na.rm=TRUE)*100
CaretakerRatio(PM=Jpop$M2015, PF=Jpop$F2015)
# Based on the age-specific population, Dr. Toshio Kuroda (1978) suggested the expanding index
# (Fukurami-shisuu in Japanese). During the rapid development stage in Japan, the movement
# of youth or newly married family frequently occurred, especially from rural to urban.
# Dr. Kuroda assumed the population aged 15-34 are easily moving compared with
# the population aged 5-14 and 35-44. The definition of the expanding index is
# (Population aged 15-34) / [(Population aged 5-14) + (Population aged 35-44)]
# It is not useful for Japanese total population, because the international migration
# is not common in Japan. The index is useful for compare the regional - prefecture or
# municiparity level - population.
IndexOfDissimilarity(Jpopl$M1888+Jpopl$F1888, Jpopl$M1940+Jpopl$F1940) # 0.05204627
IndexOfDissimilarity(Jpopl$M1950+Jpopl$F1950, Jpopl$M2000+Jpopl$F2000) # 0.2815369
# Answer to the exercise of chapter 3.
# Center labels should be treated as characters, not factors.
options(stringsAsFactors=FALSE)
# Defining data
Dominica1981 <- data.frame(
AgeGroup=c("0", "1-4", paste(1:15*5, "-", 1:15*5+4, sep=""), "80+", "not stated", "Total"),
Males=c(756, 3446, 5277, 5595, 4779, 3722, 2521, 1764, 1404, 1133, 1058, 1051, 950, 959, 850, 611, 383, 340, 155, 36754),
Females=c(727, 3267, 4850, 5488, 4611, 3286, 2190, 1755, 1416, 1328, 1292, 1311, 1097, 1190, 962, 872, 584, 689, 126, 37041))
Rhodesia1969 <- data.frame(
YOB=1969:1965,
Males=c(34460, 104020, 90790, 82830, 83220),
Females=c(36750, 111510, 94870, 87770, 83970),
Total=c(71210, 215530, 185660, 170600, 167190))
# Exercise 3-1
library(pyramid)
Ages <- c("0-4", Dominica1981$AgeGroup[3:18])
Males <- c(sum(Dominica1981$Males[1:2]), Dominica1981$Males[3:18])
Females <- c(sum(Dominica1981$Females[1:2]), Dominica1981$Females[3:18])
pyramids(Left=Males, Right=Females, Center=Ages, Laxis=0:3*2000, Cadj=-0.01)
# Exercise 3-2
Rhodesia1969$Ages <- 1969-Rhodesia1969$YOB
pyramids(Left=Rhodesia1969$Males, Right=Rhodesia1969$Females, Center=Rhodesia1969$Ages, Laxis=0:3*50000, AxisFM="d", Cadj=0.05)