用ggplot2绘制美观的柱状图

实习的时候看完了一本讲解ggplot2包的书,名字叫做《Guide to CreateBeautiful Graphics in R》。
老实说,这本书虽然有很多例子,但讲的很浅,也许这本书的定位就是参考手册吧。^^…看完了里面柱状图的内容,感觉配色不是很美观。
于是乎自己动手绘制了以下的两幅柱状图,其中配色方案来自The Pudding。感觉这个博客
图像的配色方案让人赏心悦目,自己就仿照着也画一画^
^…

图1,单因子映射

1
2
library(janeaustenr)
library(tidyverse)
1
#> Warning: package 'tibble' was built under R version 3.4.1
1
#> Warning: package 'purrr' was built under R version 3.4.1
1
2
3
4
5
6
7
8
9
10
11
12
library(tidytext)
austen_books() %>%
select(2, 1) %>%
unnest_tokens(word, text, token = "ngrams", n = 2) %>%
print() %>%
# A token is a meaningful unit of text, most often a word,
# that we are interested in using for further analysis,
# and tokenization is the process of splitting text into tokens.
count(word, sort = TRUE) %>%
top_n(10) ->
top_10_austen_bigrams
1
2
3
4
5
6
7
8
9
10
11
12
13
14
#> # A tibble: 725,049 x 2
#> book word
#> <fctr> <chr>
#> 1 Sense & Sensibility sense and
#> 2 Sense & Sensibility and sensibility
#> 3 Sense & Sensibility sensibility by
#> 4 Sense & Sensibility by jane
#> 5 Sense & Sensibility jane austen
#> 6 Sense & Sensibility austen 1811
#> 7 Sense & Sensibility 1811 chapter
#> 8 Sense & Sensibility chapter 1
#> 9 Sense & Sensibility 1 the
#> 10 Sense & Sensibility the family
#> # ... with 725,039 more rows
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
library(ggthemes)
## 一元映射
top_10_austen_bigrams %>%
print() %>%
ggplot(aes(x = reorder(word, n), y = n)) +
geom_bar(stat = "identity", fill = "#E7962A") + # `柱子`填充色为“橘黄色”
coord_flip() +
theme_fivethirtyeight() +
theme(panel.background = element_rect(fill = "#FCF0E1"), # 图片背景填充色为“土黄色”
plot.background = element_rect(fill = "#FCF0E1")) +
geom_text(aes(label = n), hjust = -0.3) + # hjust/vjust调整`标签`的位置
# 标题
labs(title = "Words Frequency Count",
subtitle = "Made in 2017-08-25,by Leo Lee",
caption = "Source: janeaustenr package",
xlab = "", ylab = "")
1
2
3
4
5
6
7
8
9
10
11
12
13
#> # A tibble: 10 x 2
#> word n
#> <chr> <int>
#> 1 of the 3017
#> 2 to be 2787
#> 3 in the 2368
#> 4 it was 1781
#> 5 i am 1545
#> 6 she had 1472
#> 7 of her 1445
#> 8 to the 1387
#> 9 she was 1377
#> 10 had been 1299

barplot1

图2,双因子映射

1
2
3
4
5
6
7
8
9
10
11
12
austen_books() %>%
select(2, 1) %>%
filter(book %in% c("Sense & Sensibility", "Pride & Prejudice")) %>%
unnest_tokens(word, text, token = "ngrams", n = 1) %>%
anti_join(stop_words, by = "word") %>%
print() %>%
group_by(book) %>%
count(book, word, sort = TRUE) %>%
top_n(15) %>%
ungroup() %>%
mutate(n2 = if_else(book == "Sense & Sensibility", -n, n)) ->
austen_2_books
1
2
3
4
5
6
7
8
9
10
11
12
13
14
#> # A tibble: 73,576 x 2
#> book word
#> <fctr> <chr>
#> 1 Sense & Sensibility rival
#> 2 Sense & Sensibility rival
#> 3 Pride & Prejudice rival
#> 4 Pride & Prejudice _near_
#> 5 Pride & Prejudice _near_
#> 6 Sense & Sensibility growing
#> 7 Sense & Sensibility growing
#> 8 Sense & Sensibility growing
#> 9 Sense & Sensibility growing
#> 10 Sense & Sensibility growing
#> # ... with 73,566 more rows
1
print(austen_2_books)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
#> # A tibble: 30 x 4
#> book word n n2
#> <fctr> <chr> <int> <int>
#> 1 Sense & Sensibility elinor 623 -623
#> 2 Pride & Prejudice elizabeth 597 597
#> 3 Sense & Sensibility marianne 492 -492
#> 4 Pride & Prejudice darcy 373 373
#> 5 Pride & Prejudice bennet 294 294
#> 6 Pride & Prejudice miss 283 283
#> 7 Pride & Prejudice jane 264 264
#> 8 Pride & Prejudice bingley 257 257
#> 9 Sense & Sensibility time 239 -239
#> 10 Sense & Sensibility dashwood 231 -231
#> # ... with 20 more rows
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
austen_2_books %>%
ggplot(aes(x = reorder(word, n2), y = n2, fill = book)) +
geom_bar(stat = "identity") +
theme_fivethirtyeight() +
theme(panel.background = element_rect(fill = "#FCF0E1"), # 图片背景填充色为“土黄色”
plot.background = element_rect(fill = "#FCF0E1"),
legend.background = element_blank()) +
# 标题
labs(title = "Words Frequency Count 2",
subtitle = "Made in 2017-08-25,by Leo Lee",
caption = "Source: janeaustenr package",
xlab = "", ylab = "",
fill = "") +
scale_fill_manual(values = c("#F26B68", "#0991DB")) +
scale_y_continuous(position = "right") +
coord_flip() +
geom_text(data = filter(austen_2_books, book == "Sense & Sensibility"),
aes(x = , y = , label = n), hjust = 1.5) +
geom_text(data = filter(austen_2_books, book != "Sense & Sensibility"),
aes(x = , y = , label = n), hjust = -0.5)

barplot2