2-1. Three Popular Data Displays
1. Stem and Leaf Diagrams
EXAMPLE 1. ํต๊ณํ ๊ฐ์๋ฅผ ๋ฃ๊ณ ์๋ 30๋ช ํ์์ ์ํ ์ฑ์ ์ ๋ค์๊ณผ ๊ฐ๋ค.
86 80 25 77 73 76 100 90 69 93
90 83 70 73 73 70 90 83 71 95
40 58 68 69 100 78 87 97 92 73์์ ๋ฐ์ดํฐ๋ฅผ ๊ฐ์ํํ๊ธฐ ์ํ ๋ฐฉ๋ฒ ์ค ํ๋๊ฐ stem and leaf diagram์ด๋ค.
R์์๋ ์ด๋ฌํ ๋ฐ์ดํฐ ํํ์ ์ํด stem() ํจ์๋ฅผ ์ฌ์ฉํ๋ค.
Syntax :
stem(x, scale = 1, width = 80, atom = 1e-08)arguments :
x : ์์นํ ๋ฒกํฐ
scale = : ํ๋กฏ์ ๊ธธ์ด๋ฅผ ์ ์ด
width = : ์ํ๋ ํ๋กฏ์ ๋์ด
atom = : tolerance
[Solution]
score <- c(86, 80, 25, 77, 73, 76, 100, 90, 69, 93,
90, 83, 70, 73, 73, 70, 90, 83, 71, 95,
40, 58, 68, 69, 100, 78, 87, 97, 92, 73)
stem(score)## The decimal point is 1 digit(s) to the right of the |
##
## 2 | 5
## 3 |
## 4 | 0
## 5 | 8
## 6 | 899
## 7 | 0013333678
## 8 | 03367
## 9 | 0002357
## 10 | 00
##10์ ์๋ฆฌ ์ซ์๊ฐ stem์ด ๋๊ณ , 1์ ์๋ฆฌ ์ซ์๊ฐ leaf๊ฐ ๋จ์ ์ ์ ์๋ค.
EXAMPLE 2. stem์ ๊ฐฏ์๋ฅผ ๋ฐ์ผ๋ก ์ค์ฌ์ diagram์ ๊ทธ๋ ค๋ผ.
[Solution]
score <- c(86, 80, 25, 77, 73, 76, 100, 90, 69, 93,
90, 83, 70, 73, 73, 70, 90, 83, 71, 95,
40, 58, 68, 69, 100, 78, 87, 97, 92, 73)
stem(score, scale = 0.5) # stem์ ๊ฐฏ์๋ฅผ 50%๋ก ์ค์ -> 2, 4, 6, 8, 10 ๋ฑ## The decimal point is 1 digit(s) to the right of the |
##
## 2 | 5
## 4 | 08
## 6 | 8990013333678
## 8 | 033670002357
## 10 | 00
##2. Frequency Histograms
stem and leaf diagram์ ๋๊ท๋ชจ ๋ฐ์ดํฐ ์ธํธ์๋ ์ ํฉํ์ง ์๋ค.
์ด ๋ ์ฌ์ฉ๋๋ ๋ฐฉ๋ฒ์ด ๋์ ๋ถํฌ์ด๋ค.
hist(x, main = paste("Histogram of ", xname),
xlim = range(breaks),
ylim = NULL,
xlab = xname,
ylab =
... )arguments :
x: ํ์คํ ๊ทธ๋จ์ ๋ฒกํฐ ๋ฐ์ดํฐmain =: ํ์คํ ๊ทธ๋จ์ ์ ๋ชฉxlim =: x ์ถ์ ๋ฒ์ylim =: y ์ถ์ ๋ฒ์xlab =: x ์ถ์ ์ ๋ชฉylab =: y ์ถ์ ์ ๋ชฉ
EXAMPLE 3. ์ด์ ์ stem and leaf diagram์ frequency histogram์ผ๋ก ๊ทธ๋ ค๋ผ.
score <- c(86, 80, 25, 77, 73, 76, 100, 90, 69, 93,
90, 83, 70, 73, 73, 70, 90, 83, 71, 95,
40, 58, 68, 69, 100, 78, 87, 97, 92, 73)
hist(score,
xlim = c(0, 110),
ylim = c(0, 12),
)
EXAMPLE 4. Using histogram()
require(lattice)
require(openintro)
score <- c(86, 80, 25, 77, 73, 76, 100, 90, 69, 93,
90, 83, 70, 73, 73, 70, 90, 83, 71, 95,
40, 58, 68, 69, 100, 78, 87, 97, 92, 73)
histogram(score, type = "count",
xlim = c(0, 110),
ylim = c(0, 12),
breaks = seq(5, 105, by=10))
EXAMPLE 5. Histogram of iris
str(iris) # iris is a dataset
# partitioning of Graphic Display, 2 by 2
par(mfrow = c(2,2))
# 1. Drawing Histograms
for (k in 1:4) hist(iris[[k]])
# 2. Redrawing the Histograms
# 2-1) Making Main Title of the Histogram
title <- paste0("Histogram of ", colnames(iris[1:4])) ; title
# 2-2) Color
col <- c("yellow", "lightgreen", "lightpink", "skyblue"); col
# 2-3) Redrawing
for (k in 1:4) hist(iris[[k]],
main=title[k],
xlab=colnames(iris[k]),
ylab="Frequency",
col = col[k])
> # 2. Redrawing the Histograms
>
> # 2-1) Making Main Title of the Histogram
> title <- paste0("Histogram of ", colnames(iris[1:4])) ; title
## [1] "Histogram of Sepal.Length" "Histogram of Sepal.Width"
## [3] "Histogram of Petal.Length" "Histogram of Petal.Width"
>
> # 2-2) Color
> col <- c("yellow", "lightgreen", "lightpink", "skyblue"); col
## [1] "yellow" "lightgreen" "lightpink" "skyblue"
>
> # 2-3) Redrawing
> for (k in 1:4) hist(iris[[k]],
## + main=title[k],
## + xlab=colnames(iris[k]),
## + ylab="Frequency",
## + col = col[k])
> 
3. Relative Frequency Histogram
EXAMPLE 6. Relative Frequency Histogram of Example 3 using histogram()
require(lattice)
require(openintro)
score <- c(86, 80, 25, 77, 73, 76, 100, 90, 69, 93,
90, 83, 70, 73, 73, 70, 90, 83, 71, 95,
40, 58, 68, 69, 100, 78, 87, 97, 92, 73)
histogram(score, type = "percent",
xlim = c(0, 110),
ylim = c(0, 40),
breaks = seq(5, 105, by=10))

Note : y ์ถ์ ๊ฐ์ด ๊ฐฏ์(count)๊ฐ ์๋ ๋ฐฑ๋ถ์จ(percent)๋ก ์ถ๋ ฅ๋๋ค.
4. Sample size and Relative Frequency Histograms
sample size๊ฐ ์ปค์ง์ ๋ฐ๋ผ ์ ์ฒด ๋ชจ์์ ์ข์ฐ ๋์นญ์ ์ข ๋ชจ์์ด ๋๋ค.

5. A Very Fine Relative Frequency Histogram

6. Frequency Table
EXAMPLE 7. Using iris data set, Find the frequency table of the 2nd column(Sepal.Width) of iris.
library(Rstat)
# import iris data set
data(iris)
# data structure of iris
str(iris)
# select the 2nd column
x <- iris[[2]]
# 1. frequency table
freq.table(x)
# 2. frequenct table & the yellow histogram
freq.table(x, mp=TRUE, col=7)
# 3. Change the class interval as 0.5
(mycut <- seq(2, 4.5, by=0.5))
freq.table(x, cut=mycut)
freq.table(x, cut=mycut, mp=TRUE, col=0)> freq.table(x)
## Center Freq Cum-Fr Rel-Fr Rel-CFr
## (2, 2.2] 2.1 4 4 0.0267 0.0267
## (2.2, 2.4] 2.3 7 11 0.0467 0.0733
## (2.4, 2.6] 2.5 13 24 0.0867 0.1600
## (2.6, 2.8] 2.7 23 47 0.1533 0.3133
## (2.8, 3] 2.9 36 83 0.2400 0.5533
## (3, 3.2] 3.1 24 107 0.1600 0.7133
## (3.2, 3.4] 3.3 18 125 0.1200 0.8333
## (3.4, 3.6] 3.5 10 135 0.0667 0.9000
## (3.6, 3.8] 3.7 9 144 0.0600 0.9600
## (3.8, 4] 3.9 3 147 0.0200 0.9800
## (4, 4.2] 4.1 2 149 0.0133 0.9933
## (4.2, 4.4] 4.3 1 150 0.0067 1.0000> freq.table(x, mp=TRUE, col=7)
Center Freq Cum-Fr Rel-Fr Rel-CFr
(2, 2.2] 2.1 4 4 0.0267 0.0267
(2.2, 2.4] 2.3 7 11 0.0467 0.0733
(2.4, 2.6] 2.5 13 24 0.0867 0.1600
(2.6, 2.8] 2.7 23 47 0.1533 0.3133
(2.8, 3] 2.9 36 83 0.2400 0.5533
(3, 3.2] 3.1 24 107 0.1600 0.7133
(3.2, 3.4] 3.3 18 125 0.1200 0.8333
(3.4, 3.6] 3.5 10 135 0.0667 0.9000
(3.6, 3.8] 3.7 9 144 0.0600 0.9600
(3.8, 4] 3.9 3 147 0.0200 0.9800
(4, 4.2] 4.1 2 149 0.0133 0.9933
(4.2, 4.4] 4.3 1 150 0.0067 1.0000
> freq.table(x, cut=mycut, mp=TRUE, col=4)
## Center Freq Cum-Fr Rel-Fr Rel-CFr
## (2, 2.5] 2.25 19 19 0.1267 0.1267
## (2.5, 3] 2.75 64 83 0.4267 0.5533
## (3, 3.5] 3.25 48 131 0.3200 0.8733
## (3.5, 4] 3.75 16 147 0.1067 0.9800
## (4, 4.5] 4.25 3 150 0.0200 1.0000
8. Unstable Histogram
Type-A : Isolated Island
Type-B : Multimodal
Type-C : Outliers
Type-D : Cliff
EXAMPLE 8. Unstable Histogram
library(Rstat)
# 1. Types of Unstable Histogram
unstable.hist() # refer to ch2.man(2)
# 2. Changing the Parameters of unstable.hist()
unstable.hist(N=100, m2=4, a=11, b=12, c=8, vc=rainbow(4))


See : Using Histograms to Understand Your Data
9. Contingency Table (Cross table)
EXAMPLE 9. Using exa2_2 data set, Find the table of each one.
Frequency table of the 2nd column
Frequency table of the 3rd column
Contingency table of the 2nd and the 3rd columns.
library(Rstat)
# data import
data(exa2_2) # exa2_2 is a dataset of Rstat
x <- exa2_2
str(x)
# 1. Frequency table of the 2nd Column
x2 <- x[[2]] ; x2 # x2 : factor variable
x21 <- table(x) ; x21 #
x22 <- prop.table(x21) ; round(x22,2)
x23 <- addmargins(x22) ; round(x23,2)
# 2. Frequency table of the 3rd column
x3 <- x[[3]] ; x3 # x3 : factor variable
x31 <- table(x3) ; x31 #
x32 <- prop.table(x31) ; round(x32,2)
x33 <- addmargins(x32) ; round(x33,2)
# 3. Contingency table of the 2nd and the 3rd Columns
x41 <- table(x2, x3) ; x41
x42 <- prop.table(x41) ; round(x42,2)
x43 <- addmargins(x42) ; round(x43,2)> # 1. Frequency table of the 2nd Column
> x2 <- x[[2]] ; head(x2) # x2 : factor variable
## [1] ํ์๋ถ์ข
ํฉ ํ์๋ถ์ข
ํฉ ํ์๋ถ์ข
ํฉ ํ์๋ถ๊ต๊ณผ ํ์๋ถ๊ต๊ณผ ํ์๋ถ์ข
ํฉ
## Levels: ๋
ผ์ ์ฐ์ ์ ์์ผ๋ฐ ํ์๋ถ๊ต๊ณผ ํ์๋ถ์ข
ํฉ
> x21 <- table(x2) ; x21 #
## x2
## ๋
ผ์ ์ฐ์ ์ ์์ผ๋ฐ ํ์๋ถ๊ต๊ณผ ํ์๋ถ์ข
ํฉ
## 43 87 51 29
> x22 <- prop.table(x21) ; round(x22,2)
## x2
## ๋
ผ์ ์ฐ์ ์ ์์ผ๋ฐ ํ์๋ถ๊ต๊ณผ ํ์๋ถ์ข
ํฉ
## 0.20 0.41 0.24 0.14
> x23 <- addmargins(x22) ; round(x23,2)
## x2
## ๋
ผ์ ์ฐ์ ์ ์์ผ๋ฐ ํ์๋ถ๊ต๊ณผ ํ์๋ถ์ข
ํฉ Sum
## 0.20 0.41 0.24 0.14 1.00
## > # 2. Frequency table of the 3rd column
> x3 <- x[[3]] ; head(x3) # x3 : factor variable
## [1] ์์จํ๋ ๊ต๊ณผํ๋ ์์จํ๋ ๊ต๊ณผํ๋ ๊ต๊ณผํ๋ ์ง๋กํ๋
## Levels: ๊ต๊ณผํ๋ ๋์๋ฆฌ ๋ด์ฌํ๋ ์์จํ๋ ์ง๋กํ๋
> x31 <- table(x3) ; x31 #
## x3
## ๊ต๊ณผํ๋ ๋์๋ฆฌ ๋ด์ฌํ๋ ์์จํ๋ ์ง๋กํ๋
## 86 31 9 62 22
> x32 <- prop.table(x31) ; round(x32,2)
## x3
## ๊ต๊ณผํ๋ ๋์๋ฆฌ ๋ด์ฌํ๋ ์์จํ๋ ์ง๋กํ๋
## 0.41 0.15 0.04 0.30 0.10
> x33 <- addmargins(x32) ; round(x33,2)
## x3
## ๊ต๊ณผํ๋ ๋์๋ฆฌ ๋ด์ฌํ๋ ์์จํ๋ ์ง๋กํ๋ Sum
## 0.41 0.15 0.04 0.30 0.10 1.00
## > # 3. Contingency table of the 2nd and the 3rd Columns
> x41 <- table(x2, x3) ; x41
## x3
## x2 ๊ต๊ณผํ๋ ๋์๋ฆฌ ๋ด์ฌํ๋ ์์จํ๋ ์ง๋กํ๋
## ๋
ผ์ ์ฐ์ 22 6 1 8 6
## ์ ์์ผ๋ฐ 35 16 1 28 7
## ํ์๋ถ๊ต๊ณผ 26 5 4 13 3
## ํ์๋ถ์ข
ํฉ 3 4 3 13 6
> x42 <- prop.table(x41) ; round(x42,2)
## x3
## x2 ๊ต๊ณผํ๋ ๋์๋ฆฌ ๋ด์ฌํ๋ ์์จํ๋ ์ง๋กํ๋
## ๋
ผ์ ์ฐ์ 0.10 0.03 0.00 0.04 0.03
## ์ ์์ผ๋ฐ 0.17 0.08 0.00 0.13 0.03
## ํ์๋ถ๊ต๊ณผ 0.12 0.02 0.02 0.06 0.01
## ํ์๋ถ์ข
ํฉ 0.01 0.02 0.01 0.06 0.03
> x43 <- addmargins(x42) ; round(x43,2)
## x3
## x2 ๊ต๊ณผํ๋ ๋์๋ฆฌ ๋ด์ฌํ๋ ์์จํ๋ ์ง๋กํ๋ Sum
## ๋
ผ์ ์ฐ์ 0.10 0.03 0.00 0.04 0.03 0.20
## ์ ์์ผ๋ฐ 0.17 0.08 0.00 0.13 0.03 0.41
## ํ์๋ถ๊ต๊ณผ 0.12 0.02 0.02 0.06 0.01 0.24
## ํ์๋ถ์ข
ํฉ 0.01 0.02 0.01 0.06 0.03 0.14
## Sum 0.41 0.15 0.04 0.30 0.10 1.00
> 1๏ผ้ขๆฐ(ๅๅธ)่กจ(frequency table)ใ 2๏ผ็ธๅฏนโ้ขๆฐ(ๅๅธ)่กจโ(relative frequency table)ใ 3๏ผ้ขๆฐๅๅธๅพ(frequency diagram)ใ 4๏ผ็ธๅฏน้ขๆฐๅๅธๅพ(relative frequency diagram)ใ 5๏ผ่ๅถๅพ(stem and leaf diagram)ใ 6๏ผๆ ๅฝขๅๆ่กจ(contingency table)ใ
Last updated
Was this helpful?