ggplot2绘制世界产油量地图

ggplot2作为Hadley大神最得意的作品之一,一经推出便受到了R社区的喜爱。事实证明,群众的眼睛是雪亮的,ggplot2真的是R语言中最优雅美观的绘图系统。目前关于ggplot2的扩展包也是琳琅满目,好像没有什么统计图是ggplot2不能画的😂。除了统计图之外,ggplot2还可以绘制地图,那叫一个漂亮😻。今天这篇博客就是专门讲用ggplot2绘制美观的地图。Talk is cheap, show me the code!!!😉

载入需要的包

1
2
3
4
library(ggplot2)
library(dplyr)
library(stringr)
library(rvest) # 从维基百科上抓取数据
1
#> Loading required package: xml2
1
2
3
4
5
6
library(magrittr)
# 原文作者这里加载了sf包,但其实没必要,因为没有用到`geom_sf`这一图层
# 一般绘制地图有两种方法:一是地理数据和其他变量数据的拼接;二是类似`geom_sf`这样的图层
# library(sf)
# library(scales)
# library(viridis)

从Wikipedia上抓取数据

1
2
3
web_url <- "https://en.wikipedia.org/wiki/List_of_countries_by_oil_production"
# 查看网页的编码信息
html_session(web_url)
1
2
3
4
#> <session> https://en.wikipedia.org/wiki/List_of_countries_by_oil_production
#> Status: 200
#> Type: text/html; charset=UTF-8
#> Size: 109498
1
2
3
4
5
df_oil <- read_html("https://en.wikipedia.org/wiki/List_of_countries_by_oil_production") %>%
html_nodes("table") %>%
.[[1]] %>%
html_table()
head(df_oil)
1
2
3
4
5
6
7
#> Country Oil Production(bbl/day, 2016)[1]
#> 1 1 Russia 10,551,497
#> 2 2 Saudi Arabia (OPEC) 10,460,710
#> 3 3 United States 8,875,817
#> 4 4 Iraq (OPEC) 4,451,516
#> 5 5 Iran (OPEC) 3,990,956
#> 6 6 China 3,980,650

重命名列名称

1
2
3
colnames(df_oil) <- c("rank", "country", "oil_bbl_per_day")
df_oil %<>%
dplyr::as_tibble()

将变量映射到几何形状上

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# 将产量排名转化为`integer`
df_oil2 <- df_oil %>%
mutate(rank = as.integer(rank))
df_oil2
#> # A tibble: 98 x 3
#> rank country oil_bbl_per_day
#> <int> <chr> <chr>
#> 1 1 Russia 10,551,497
#> 2 2 Saudi Arabia (OPEC) 10,460,710
#> 3 3 United States 8,875,817
#> 4 4 Iraq (OPEC) 4,451,516
#> 5 5 Iran (OPEC) 3,990,956
#> 6 6 China 3,980,650
#> 7 7 Canada 3,662,694
#> 8 8 United Arab Emirates (OPEC) 3,106,077
#> 9 9 Kuwait (OPEC) 2,923,825
#> 10 10 Brazil 2,515,459
#> # ... with 88 more rows
# 将石油日均产量转化为`integer`
df_oil3 <- df_oil2 %>%
mutate(oil_bbl_per_day = oil_bbl_per_day %>%
str_replace_all(",", "") %>%
as.integer())
df_oil3
#> # A tibble: 98 x 3
#> rank country oil_bbl_per_day
#> <int> <chr> <int>
#> 1 1 Russia 10551497
#> 2 2 Saudi Arabia (OPEC) 10460710
#> 3 3 United States 8875817
#> 4 4 Iraq (OPEC) 4451516
#> 5 5 Iran (OPEC) 3990956
#> 6 6 China 3980650
#> 7 7 Canada 3662694
#> 8 8 United Arab Emirates (OPEC) 3106077
#> 9 9 Kuwait (OPEC) 2923825
#> 10 10 Brazil 2515459
#> # ... with 88 more rows
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# 新创建一个变量,'opec_ind',表示哪些国家是`OPEC`成员国
df_oil4 <- df_oil3 %>%
mutate(opec_ind = if_else(str_detect(country, "OPEC"), 1, 0))
df_oil4
#> # A tibble: 98 x 4
#> rank country oil_bbl_per_day opec_ind
#> <int> <chr> <int> <dbl>
#> 1 1 Russia 10551497 0
#> 2 2 Saudi Arabia (OPEC) 10460710 1.00
#> 3 3 United States 8875817 0
#> 4 4 Iraq (OPEC) 4451516 1.00
#> 5 5 Iran (OPEC) 3990956 1.00
#> 6 6 China 3980650 0
#> 7 7 Canada 3662694 0
#> 8 8 United Arab Emirates (OPEC) 3106077 1.00
#> 9 9 Kuwait (OPEC) 2923825 1.00
#> 10 10 Brazil 2515459 0
#> # ... with 88 more rows
# 对变量`country`进行tidy
# - 可以看到,在`country`列一些国家被标注了`OPEC`
# - 因为后面需要和地图数据进行连接,所以需要将`OPEC`这一标注去掉
df_oil5 <- df_oil4 %>%
mutate(country = country %>%
str_replace(" \\(OPEC\\)", "") %>%
str_replace("\\s{2,}", " "))
df_oil5
#> # A tibble: 98 x 4
#> rank country oil_bbl_per_day opec_ind
#> <int> <chr> <int> <dbl>
#> 1 1 Russia 10551497 0
#> 2 2 Saudi Arabia 10460710 1.00
#> 3 3 United States 8875817 0
#> 4 4 Iraq 4451516 1.00
#> 5 5 Iran 3990956 1.00
#> 6 6 China 3980650 0
#> 7 7 Canada 3662694 0
#> 8 8 United Arab Emirates 3106077 1.00
#> 9 9 Kuwait 2923825 1.00
#> 10 10 Brazil 2515459 0
#> # ... with 88 more rows
# 手动检查前面的代码是否出错
df_oil5 %>%
filter(opec_ind == 1) %>%
select(country)
#> # A tibble: 14 x 1
#> country
#> <chr>
#> 1 Saudi Arabia
#> 2 Iraq
#> 3 Iran
#> 4 United Arab Emirates
#> 5 Kuwait
#> 6 Venezuela
#> 7 Nigeria
#> 8 Angola
#> 9 Qatar
#> 10 Algeria
#> 11 Ecuador
#> 12 Libya
#> 13 Equatorial Guinea
#> 14 Gabon
1
2
3
4
5
# 对变量进行重编码,`dplyr::recode`函数
df_oil6 <- df_oil5 %>%
select(rank, country, opec_ind, oil_bbl_per_day)
df_oil6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
#> # A tibble: 98 x 4
#> rank country opec_ind oil_bbl_per_day
#> <int> <chr> <dbl> <int>
#> 1 1 Russia 0 10551497
#> 2 2 Saudi Arabia 1.00 10460710
#> 3 3 United States 0 8875817
#> 4 4 Iraq 1.00 4451516
#> 5 5 Iran 1.00 3990956
#> 6 6 China 0 3980650
#> 7 7 Canada 0 3662694
#> 8 8 United Arab Emirates 1.00 3106077
#> 9 9 Kuwait 1.00 2923825
#> 10 10 Brazil 0 2515459
#> # ... with 88 more rows
1
2
3
4
# 获取地图数据
map_world <- map_data("world") # 来自`maps`包
map_world %<>%
as_tibble()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# 对地图数据进行重编码
map_world %>%
group_by(region) %>%
summarise() %>%
print(n = 6)
#> # A tibble: 252 x 1
#> region
#> <chr>
#> 1 Afghanistan
#> 2 Albania
#> 3 Algeria
#> 4 American Samoa
#> 5 Andorra
#> 6 Angola
#> # ... with 246 more rows
df_oil7 <- df_oil6 %>%
mutate(country = recode(
country, `United States` = "USA",
`United Kingdom` = "UK",
`Congo, Democratic Republic of the` = "Democratic Republic of the Congo",
`Trinidad and Tobago` = "Trinidad",
`Sudan and South Sudan` = "Sudan",
# `Sudan and South Sudan` = 'South Sudan',
`Congo, Republic of the` = "Republic of Congo"
))
df_oil7
#> # A tibble: 98 x 4
#> rank country opec_ind oil_bbl_per_day
#> <int> <chr> <dbl> <int>
#> 1 1 Russia 0 10551497
#> 2 2 Saudi Arabia 1.00 10460710
#> 3 3 USA 0 8875817
#> 4 4 Iraq 1.00 4451516
#> 5 5 Iran 1.00 3990956
#> 6 6 China 0 3980650
#> 7 7 Canada 0 3662694
#> 8 8 United Arab Emirates 1.00 3106077
#> 9 9 Kuwait 1.00 2923825
#> 10 10 Brazil 0 2515459
#> # ... with 88 more rows
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# 将地图数据和石油产量数据进行连接
map_world
#> # A tibble: 99,338 x 6
#> long lat group order region subregion
#> * <dbl> <dbl> <dbl> <int> <chr> <chr>
#> 1 -69.9 12.5 1.00 1 Aruba <NA>
#> 2 -69.9 12.4 1.00 2 Aruba <NA>
#> 3 -69.9 12.4 1.00 3 Aruba <NA>
#> 4 -70.0 12.5 1.00 4 Aruba <NA>
#> 5 -70.1 12.5 1.00 5 Aruba <NA>
#> 6 -70.1 12.6 1.00 6 Aruba <NA>
#> 7 -70.0 12.6 1.00 7 Aruba <NA>
#> 8 -70.0 12.6 1.00 8 Aruba <NA>
#> 9 -69.9 12.5 1.00 9 Aruba <NA>
#> 10 -69.9 12.5 1.00 10 Aruba <NA>
#> # ... with 99,328 more rows
map_oil <- left_join(map_world, df_oil7, by = c("region" = "country"))
map_oil
#> # A tibble: 99,338 x 9
#> long lat group order region subregion rank opec_ind oil_bbl_per_day
#> <dbl> <dbl> <dbl> <int> <chr> <chr> <int> <dbl> <int>
#> 1 -69.9 12.5 1.00 1 Aruba <NA> NA NA NA
#> 2 -69.9 12.4 1.00 2 Aruba <NA> NA NA NA
#> 3 -69.9 12.4 1.00 3 Aruba <NA> NA NA NA
#> 4 -70.0 12.5 1.00 4 Aruba <NA> NA NA NA
#> 5 -70.1 12.5 1.00 5 Aruba <NA> NA NA NA
#> 6 -70.1 12.6 1.00 6 Aruba <NA> NA NA NA
#> 7 -70.0 12.6 1.00 7 Aruba <NA> NA NA NA
#> 8 -70.0 12.6 1.00 8 Aruba <NA> NA NA NA
#> 9 -69.9 12.5 1.00 9 Aruba <NA> NA NA NA
#> 10 -69.9 12.5 1.00 10 Aruba <NA> NA NA NA
#> # ... with 99,328 more rows
# 石油日均产量大于822675桶的国家的日均产量
df_oil7 %>%
filter(oil_bbl_per_day > 822675) %>%
summarise(mean(oil_bbl_per_day))
#> # A tibble: 1 x 1
#> `mean(oil_bbl_per_day)`
#> <dbl>
#> 1 3190373
# 石油日均产量小于822675桶的国家的日均产量
df_oil7 %>%
filter(oil_bbl_per_day < 822675) %>%
summarise(mean(oil_bbl_per_day))
#> # A tibble: 1 x 1
#> `mean(oil_bbl_per_day)`
#> <dbl>
#> 1 96581

绘图

1
2
3
# 未加任何修饰的图
ggplot(map_oil, aes(x = long, y = lat, group = group)) +
geom_polygon(aes(fill = oil_bbl_per_day))

1
2
# 美化后的地图
map_oil
1
2
3
4
5
6
7
8
9
10
11
12
13
14
#> # A tibble: 99,338 x 9
#> long lat group order region subregion rank opec_ind oil_bbl_per_day
#> <dbl> <dbl> <dbl> <int> <chr> <chr> <int> <dbl> <int>
#> 1 -69.9 12.5 1.00 1 Aruba <NA> NA NA NA
#> 2 -69.9 12.4 1.00 2 Aruba <NA> NA NA NA
#> 3 -69.9 12.4 1.00 3 Aruba <NA> NA NA NA
#> 4 -70.0 12.5 1.00 4 Aruba <NA> NA NA NA
#> 5 -70.1 12.5 1.00 5 Aruba <NA> NA NA NA
#> 6 -70.1 12.6 1.00 6 Aruba <NA> NA NA NA
#> 7 -70.0 12.6 1.00 7 Aruba <NA> NA NA NA
#> 8 -70.0 12.6 1.00 8 Aruba <NA> NA NA NA
#> 9 -69.9 12.5 1.00 9 Aruba <NA> NA NA NA
#> 10 -69.9 12.5 1.00 10 Aruba <NA> NA NA NA
#> # ... with 99,328 more rows
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
ggplot(map_oil, aes(x = long, y = lat, group = group)) +
# 几何多边形图层
geom_polygon(aes(fill = oil_bbl_per_day)) +
# 地图的填充色, 来自viridis包
scale_fill_gradientn(
colours = c("#461863", "#404E88", "#2A8A8C", "#7FD157", "#F9E53F"),
values = scales::rescale(c(100, 96581, 822675, 3190373, 10000000)),
labels = scales::comma,
breaks = c(100, 96581, 822675, 3190373, 10000000)
) +
guides(fill = guide_legend(reverse = T)) +
labs(
# 填充图例的名称
fill = "bbl/day",
title = "Oil Production by Country",
subtitle = "Barrels per day, 2016",
x = NULL,
y = NULL
) +
theme(
text = element_text(family = "Gill Sans", color = "#EEEEEE"),
plot.title = element_text(size = 28),
plot.subtitle = element_text(size = 14),
axis.ticks = element_blank(),
axis.text = element_blank(),
panel.grid = element_blank(),
panel.background = element_rect(fill = "#333333"),
plot.background = element_rect(fill = "#333333"),
legend.position = c(.18, .36),
legend.background = element_blank(),
legend.key = element_blank()
) +
annotate(
geom = "text",
label = "Source: U.S. Energy Information Administration\nhttps://en.wikipedia.org/wiki/List_of_countries_by_oil_production",
x = 18, y = -55,
size = 3,
family = "Gill Sans",
color = "#CCCCCC",
hjust = "left"
) ->
oil_map_plot
oil_map_plot

1
2
# plotly包绘制的动态图
plotly::ggplotly(oil_map_plot)
1
2
#> We recommend that you use the dev version of ggplot2 with `ggplotly()`
#> Install it with: `devtools::install_github('hadley/ggplot2')`

参考