tibbletime包:时序数据处理的倚天剑

tibbletimeBusiness Science团队推出的一款与tidyverse框架完美契合的时序数据包。tibbletime提供了一种新的时序数据类型——tbl_time,其继承了R中数据框的data.frame类以及tibble包中的tbl_df类,与dplyr这一数据处理利器完美契合。同时,tibbletime还提供了一系列强大的函数用以处理tbl_time类型的数据。
tibbletime包的作者这样介绍其优势:

  • filter_time()函数:简洁的切片操作
  • collapse_index()函数:根据时间(例如年、月、每两周等)对索引列进行分割,进而利用dplyr包的分组函数计算汇总数据
  • as_period()函数:改变一个time tibble的时间间隔,让日数据变为周数据/月数据更为简单
  • rolliyf()函数:滚动分析
  • create_series()函数:快捷地创建tbl_time类型的时序数据
  • 索引列完美支持DatePOSIXct类型,初步支持yearmonyearqtrhms类型。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
filter_time
#> function (.tbl_time, time_formula)
#> {
#> UseMethod("filter_time")
#> }
#> <environment: namespace:tibbletime>
collapse_index
#> function (index, period = "yearly", start_date = NULL, side = "end",
#> ...)
#> {
#> index_part <- partition_index(index, period, start_date)
#> index_num <- to_posixct_numeric(index)
#> if (side == "start") {
#> pos <- match(unique(index_part), index_part)
#> index_at_pos <- index_num[pos]
#> reps <- diff(c(pos, length(index_part) + 1))
#> }
#> else if (side == "end") {
#> pos <- length(index_part) - match(unique(index_part),
#> rev(index_part)) + 1
#> index_at_pos <- index_num[pos]
#> reps <- diff(c(0, pos))
#> }
#> new_index <- posixct_numeric_to_datetime(x = rep(index_at_pos,
#> reps), class = get_index_col_class(index), tz = get_index_col_time_zone(index))
#> new_index
#> }
#> <environment: namespace:tibbletime>
as_period
#> function (.tbl_time, period = "yearly", start_date = NULL, side = "start",
#> include_endpoints = FALSE, ...)
#> {
#> UseMethod("as_period")
#> }
#> <environment: namespace:tibbletime>
rollify
#> function (.f, window = 1, unlist = TRUE, na_value = NULL)
#> {
#> .f <- purrr::as_mapper(.f)
#> function(...) {
#> roller(..., .f = .f, window = window, unlist = unlist,
#> na_value = na_value)
#> }
#> }
#> <environment: namespace:tibbletime>
create_series
#> function (time_formula, period = "daily", class = "POSIXct",
#> include_end = FALSE, tz = "UTC", as_vector = FALSE)
#> {
#> period_list <- parse_period(period)
#> dummy_index <- make_dummy_dispatch_obj(class)
#> assert_allowed_datetime(dummy_index)
#> assert_period_matches_index_class(dummy_index, period_list$period)
#> seq_fun <- lookup_seq_fun(dummy_index)
#> tf_list <- parse_time_formula(dummy_index, time_formula)
#> from_to <- purrr::map(tf_list, ~list_to_datetime(dummy_index,
#> .x, tz = tz))
#> from <- from_to[[1]]
#> to <- from_to[[2]]
#> by <- paste(period_list$freq, period_list$period)
#> assert_from_before_to(from, to)
#> date_seq <- seq_fun(from, to, by = by)
#> if (include_end) {
#> if (max(date_seq) < to) {
#> date_seq <- push_datetime(date_seq, to)
#> }
#> }
#> if (as_vector) {
#> date_seq
#> }
#> else {
#> as_tbl_time(tibble::tibble(date = date_seq), date)
#> }
#> }
#> <environment: namespace:tibbletime>

下面将通过实例来展示tibbletime的强大。

1
2
3
4
5
## 安装
# 正式版
install.packages("tibbletime")
# 开发板
devtools::install_github("business-science/tibbletime")

tibbletime操作

1
2
3
4
5
6
7
8
9
10
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(tibbletime)
1
2
3
4
5
6
7
8
9
# ls(getNamespace("tibbletime"))
ls("package:tibbletime")
#> [1] "%>%" "as_period" "as_tbl_time"
#> [4] "ceiling_index" "collapse_index" "create_series"
#> [7] "filter" "filter_time" "floor_index"
#> [10] "get_index_char" "get_index_class" "get_index_col"
#> [13] "get_index_quo" "get_index_time_zone" "parse_period"
#> [16] "partition_index" "reconstruct" "rollify"
#> [19] "tbl_time"

将常规的数据框转化为tbl_time

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
# Facebook stock prices. Comes with the package
data("FB")
FB
#> # A tibble: 1,008 x 8
#> symbol date open high low close volume adjusted
#> <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 FB 2013-01-02 27.4 28.2 27.4 28.0 69846400 28.0
#> 2 FB 2013-01-03 27.9 28.5 27.6 27.8 63140600 27.8
#> 3 FB 2013-01-04 28.0 28.9 27.8 28.8 72715400 28.8
#> 4 FB 2013-01-07 28.7 29.8 28.6 29.4 83781800 29.4
#> 5 FB 2013-01-08 29.5 29.6 28.9 29.1 45871300 29.1
#> 6 FB 2013-01-09 29.7 30.6 29.5 30.6 104787700 30.6
#> 7 FB 2013-01-10 30.6 31.5 30.3 31.3 95316400 31.3
#> 8 FB 2013-01-11 31.3 32.0 31.1 31.7 89598000 31.7
#> 9 FB 2013-01-14 32.1 32.2 30.6 31.0 98892800 31.0
#> 10 FB 2013-01-15 30.6 31.7 29.9 30.1 173242600 30.1
#> # ... with 998 more rows
class(FB)
#> [1] "tbl_df" "tbl" "data.frame"
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
FB_tbl_time <- FB %>%
as_tbl_time(index = date)
FB_tbl_time
#> # A time tibble: 1,008 x 8
#> # Index: date
#> symbol date open high low close volume adjusted
#> <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 FB 2013-01-02 27.4 28.2 27.4 28.0 69846400 28.0
#> 2 FB 2013-01-03 27.9 28.5 27.6 27.8 63140600 27.8
#> 3 FB 2013-01-04 28.0 28.9 27.8 28.8 72715400 28.8
#> 4 FB 2013-01-07 28.7 29.8 28.6 29.4 83781800 29.4
#> 5 FB 2013-01-08 29.5 29.6 28.9 29.1 45871300 29.1
#> 6 FB 2013-01-09 29.7 30.6 29.5 30.6 104787700 30.6
#> 7 FB 2013-01-10 30.6 31.5 30.3 31.3 95316400 31.3
#> 8 FB 2013-01-11 31.3 32.0 31.1 31.7 89598000 31.7
#> 9 FB 2013-01-14 32.1 32.2 30.6 31.0 98892800 31.0
#> 10 FB 2013-01-15 30.6 31.7 29.9 30.1 173242600 30.1
#> # ... with 998 more rows
class(FB_tbl_time)
#> [1] "tbl_time" "tbl_df" "tbl" "data.frame"
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
my_df <- data.frame(x1 = Sys.time() + 1:20,
x2 = letters[1:20],
x3 = round(rnorm(20, mean = 3), 4))
head(my_df)
#> x1 x2 x3
#> 1 2018-01-06 10:28:58 a 3.4440
#> 2 2018-01-06 10:28:59 b 4.3121
#> 3 2018-01-06 10:29:00 c 2.5993
#> 4 2018-01-06 10:29:01 d 3.0153
#> 5 2018-01-06 10:29:02 e 3.1968
#> 6 2018-01-06 10:29:03 f 1.5981
my_df %>%
as_tbl_time(index = x1) %>%
head(n = 4)
#> # A time tibble: 4 x 3
#> # Index: x1
#> x1 x2 x3
#> <dttm> <fctr> <dbl>
#> 1 2018-01-06 10:28:58 a 3.44
#> 2 2018-01-06 10:28:59 b 4.31
#> 3 2018-01-06 10:29:00 c 2.60
#> 4 2018-01-06 10:29:01 d 3.02

tbl_time对象切片

filter_time()函数

1
filter_time
1
2
3
4
5
#> function (.tbl_time, time_formula)
#> {
#> UseMethod("filter_time")
#> }
#> <environment: namespace:tibbletime>
1
2
3
# 提取2013年3月至2015年的数据
FB_tbl_time %>%
filter_time(time_formula = '2013-03' ~ '2015')
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#> # A time tibble: 716 x 8
#> # Index: date
#> symbol date open high low close volume adjusted
#> <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 FB 2013-03-01 27.0 28.1 26.8 27.8 54064800 27.8
#> 2 FB 2013-03-04 27.8 28.1 27.4 27.7 32400700 27.7
#> 3 FB 2013-03-05 27.9 28.2 27.2 27.5 40622200 27.5
#> 4 FB 2013-03-06 28.1 28.1 27.4 27.5 33532600 27.5
#> 5 FB 2013-03-07 27.6 28.7 27.5 28.6 74540200 28.6
#> 6 FB 2013-03-08 28.4 28.5 27.7 28.0 44198900 28.0
#> 7 FB 2013-03-11 28.0 28.6 27.8 28.1 35642100 28.1
#> 8 FB 2013-03-12 28.1 28.3 27.6 27.8 27569600 27.8
#> 9 FB 2013-03-13 27.6 27.6 26.9 27.1 39619500 27.1
#> 10 FB 2013-03-14 27.1 27.4 26.8 27.0 27646400 27.0
#> # ... with 706 more rows
1
2
3
# 提取2013年1月2日至2013年12月31日的数据
FB_tbl_time %>%
filter_time(time_formula = "2013-01-02" ~ "2013-12-31")
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#> # A time tibble: 252 x 8
#> # Index: date
#> symbol date open high low close volume adjusted
#> <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 FB 2013-01-02 27.4 28.2 27.4 28.0 69846400 28.0
#> 2 FB 2013-01-03 27.9 28.5 27.6 27.8 63140600 27.8
#> 3 FB 2013-01-04 28.0 28.9 27.8 28.8 72715400 28.8
#> 4 FB 2013-01-07 28.7 29.8 28.6 29.4 83781800 29.4
#> 5 FB 2013-01-08 29.5 29.6 28.9 29.1 45871300 29.1
#> 6 FB 2013-01-09 29.7 30.6 29.5 30.6 104787700 30.6
#> 7 FB 2013-01-10 30.6 31.5 30.3 31.3 95316400 31.3
#> 8 FB 2013-01-11 31.3 32.0 31.1 31.7 89598000 31.7
#> 9 FB 2013-01-14 32.1 32.2 30.6 31.0 98892800 31.0
#> 10 FB 2013-01-15 30.6 31.7 29.9 30.1 173242600 30.1
#> # ... with 242 more rows
1
# 注:这里`~`两边的时间/日期都包含在内

利用dplyr独立完成这一操作的代码如下:

1
2
FB_tbl_time %>%
filter(date >= as.Date("2013-01-02"), date <= as.Date("2013-12-31"))
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#> # A time tibble: 252 x 8
#> # Index: date
#> symbol date open high low close volume adjusted
#> <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 FB 2013-01-02 27.4 28.2 27.4 28.0 69846400 28.0
#> 2 FB 2013-01-03 27.9 28.5 27.6 27.8 63140600 27.8
#> 3 FB 2013-01-04 28.0 28.9 27.8 28.8 72715400 28.8
#> 4 FB 2013-01-07 28.7 29.8 28.6 29.4 83781800 29.4
#> 5 FB 2013-01-08 29.5 29.6 28.9 29.1 45871300 29.1
#> 6 FB 2013-01-09 29.7 30.6 29.5 30.6 104787700 30.6
#> 7 FB 2013-01-10 30.6 31.5 30.3 31.3 95316400 31.3
#> 8 FB 2013-01-11 31.3 32.0 31.1 31.7 89598000 31.7
#> 9 FB 2013-01-14 32.1 32.2 30.6 31.0 98892800 31.0
#> 10 FB 2013-01-15 30.6 31.7 29.9 30.1 173242600 30.1
#> # ... with 242 more rows

可以看到,代码量增加了很多。

1
2
3
# 提取2013年1月至2014年2月的数据
FB_tbl_time %>%
filter_time("2013-01" ~ "2014-02")
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#> # A time tibble: 292 x 8
#> # Index: date
#> symbol date open high low close volume adjusted
#> <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 FB 2013-01-02 27.4 28.2 27.4 28.0 69846400 28.0
#> 2 FB 2013-01-03 27.9 28.5 27.6 27.8 63140600 27.8
#> 3 FB 2013-01-04 28.0 28.9 27.8 28.8 72715400 28.8
#> 4 FB 2013-01-07 28.7 29.8 28.6 29.4 83781800 29.4
#> 5 FB 2013-01-08 29.5 29.6 28.9 29.1 45871300 29.1
#> 6 FB 2013-01-09 29.7 30.6 29.5 30.6 104787700 30.6
#> 7 FB 2013-01-10 30.6 31.5 30.3 31.3 95316400 31.3
#> 8 FB 2013-01-11 31.3 32.0 31.1 31.7 89598000 31.7
#> 9 FB 2013-01-14 32.1 32.2 30.6 31.0 98892800 31.0
#> 10 FB 2013-01-15 30.6 31.7 29.9 30.1 173242600 30.1
#> # ... with 282 more rows

当然,利用tibbletime代码还可以进一步的缩减。

1
2
3
# 提取2013年一整年的数据
FB_tbl_time %>%
filter_time(time_formula = "2013" ~ "2013")
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#> # A time tibble: 252 x 8
#> # Index: date
#> symbol date open high low close volume adjusted
#> <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 FB 2013-01-02 27.4 28.2 27.4 28.0 69846400 28.0
#> 2 FB 2013-01-03 27.9 28.5 27.6 27.8 63140600 27.8
#> 3 FB 2013-01-04 28.0 28.9 27.8 28.8 72715400 28.8
#> 4 FB 2013-01-07 28.7 29.8 28.6 29.4 83781800 29.4
#> 5 FB 2013-01-08 29.5 29.6 28.9 29.1 45871300 29.1
#> 6 FB 2013-01-09 29.7 30.6 29.5 30.6 104787700 30.6
#> 7 FB 2013-01-10 30.6 31.5 30.3 31.3 95316400 31.3
#> 8 FB 2013-01-11 31.3 32.0 31.1 31.7 89598000 31.7
#> 9 FB 2013-01-14 32.1 32.2 30.6 31.0 98892800 31.0
#> 10 FB 2013-01-15 30.6 31.7 29.9 30.1 173242600 30.1
#> # ... with 242 more rows

还长了,再缩减一下

1
2
3
# 提取2013年一整年的数据
FB_tbl_time %>%
filter_time(time_formula = ~ "2013")
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#> # A time tibble: 252 x 8
#> # Index: date
#> symbol date open high low close volume adjusted
#> <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 FB 2013-01-02 27.4 28.2 27.4 28.0 69846400 28.0
#> 2 FB 2013-01-03 27.9 28.5 27.6 27.8 63140600 27.8
#> 3 FB 2013-01-04 28.0 28.9 27.8 28.8 72715400 28.8
#> 4 FB 2013-01-07 28.7 29.8 28.6 29.4 83781800 29.4
#> 5 FB 2013-01-08 29.5 29.6 28.9 29.1 45871300 29.1
#> 6 FB 2013-01-09 29.7 30.6 29.5 30.6 104787700 30.6
#> 7 FB 2013-01-10 30.6 31.5 30.3 31.3 95316400 31.3
#> 8 FB 2013-01-11 31.3 32.0 31.1 31.7 89598000 31.7
#> 9 FB 2013-01-14 32.1 32.2 30.6 31.0 98892800 31.0
#> 10 FB 2013-01-15 30.6 31.7 29.9 30.1 173242600 30.1
#> # ... with 242 more rows
1
2
3
# 提取2015年3月份的数据
FB_tbl_time %>%
filter_time(time_formula = ~ "2015-03")
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#> # A time tibble: 22 x 8
#> # Index: date
#> symbol date open high low close volume adjusted
#> <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 FB 2015-03-02 79.0 79.9 78.5 79.8 21662500 79.8
#> 2 FB 2015-03-03 79.6 79.7 78.5 79.6 18635000 79.6
#> 3 FB 2015-03-04 79.3 81.2 78.8 80.9 28126700 80.9
#> 4 FB 2015-03-05 81.2 82.0 81.1 81.2 27825700 81.2
#> 5 FB 2015-03-06 80.9 81.3 79.8 80.0 24488600 80.0
#> 6 FB 2015-03-09 79.7 79.9 78.6 79.4 18925100 79.4
#> 7 FB 2015-03-10 78.5 79.3 77.6 77.6 23067100 77.6
#> 8 FB 2015-03-11 77.8 78.4 77.3 77.6 20215700 77.6
#> 9 FB 2015-03-12 78.1 79.1 77.9 78.9 16093300 78.9
#> 10 FB 2015-03-13 78.6 79.4 77.7 78.1 18557300 78.1
#> # ... with 12 more rows

两个关键词参数

  • start
  • end
1
2
3
# 提取从序列开始到2015年的所有数据,包含2015年
FB_tbl_time %>%
filter_time(time_formula = "start" ~ "2015")
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#> # A time tibble: 756 x 8
#> # Index: date
#> symbol date open high low close volume adjusted
#> <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 FB 2013-01-02 27.4 28.2 27.4 28.0 69846400 28.0
#> 2 FB 2013-01-03 27.9 28.5 27.6 27.8 63140600 27.8
#> 3 FB 2013-01-04 28.0 28.9 27.8 28.8 72715400 28.8
#> 4 FB 2013-01-07 28.7 29.8 28.6 29.4 83781800 29.4
#> 5 FB 2013-01-08 29.5 29.6 28.9 29.1 45871300 29.1
#> 6 FB 2013-01-09 29.7 30.6 29.5 30.6 104787700 30.6
#> 7 FB 2013-01-10 30.6 31.5 30.3 31.3 95316400 31.3
#> 8 FB 2013-01-11 31.3 32.0 31.1 31.7 89598000 31.7
#> 9 FB 2013-01-14 32.1 32.2 30.6 31.0 98892800 31.0
#> 10 FB 2013-01-15 30.6 31.7 29.9 30.1 173242600 30.1
#> # ... with 746 more rows
1
2
3
# 提取从2015年到序列结束的所有数据,包含2015年
FB_tbl_time %>%
filter_time(time_formula = "2015" ~ "end")
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#> # A time tibble: 504 x 8
#> # Index: date
#> symbol date open high low close volume adjusted
#> <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 FB 2015-01-02 78.6 78.9 77.7 78.4 18177500 78.4
#> 2 FB 2015-01-05 78.0 79.2 76.9 77.2 26452200 77.2
#> 3 FB 2015-01-06 77.2 77.6 75.4 76.2 27399300 76.2
#> 4 FB 2015-01-07 76.8 77.4 75.8 76.2 22045300 76.2
#> 5 FB 2015-01-08 76.7 78.2 76.1 78.2 23961000 78.2
#> 6 FB 2015-01-09 78.2 78.6 77.2 77.7 21157000 77.7
#> 7 FB 2015-01-12 77.8 78.0 76.2 76.7 19190200 76.7
#> 8 FB 2015-01-13 77.2 78.1 75.8 76.4 25179600 76.4
#> 9 FB 2015-01-14 76.4 77.2 76.0 76.3 25741000 76.3
#> 10 FB 2015-01-15 76.4 76.6 73.5 74.1 34134000 74.1
#> # ... with 494 more rows

对分组后的tbl_time对象进行取子集操作

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# Facebook, Amazon, Netify, Google
data("FANG")
FANG
#> # A tibble: 4,032 x 8
#> symbol date open high low close volume adjusted
#> <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 FB 2013-01-02 27.4 28.2 27.4 28.0 69846400 28.0
#> 2 FB 2013-01-03 27.9 28.5 27.6 27.8 63140600 27.8
#> 3 FB 2013-01-04 28.0 28.9 27.8 28.8 72715400 28.8
#> 4 FB 2013-01-07 28.7 29.8 28.6 29.4 83781800 29.4
#> 5 FB 2013-01-08 29.5 29.6 28.9 29.1 45871300 29.1
#> 6 FB 2013-01-09 29.7 30.6 29.5 30.6 104787700 30.6
#> 7 FB 2013-01-10 30.6 31.5 30.3 31.3 95316400 31.3
#> 8 FB 2013-01-11 31.3 32.0 31.1 31.7 89598000 31.7
#> 9 FB 2013-01-14 32.1 32.2 30.6 31.0 98892800 31.0
#> 10 FB 2013-01-15 30.6 31.7 29.9 30.1 173242600 30.1
#> # ... with 4,022 more rows
FANG %>%
as_tbl_time(index = date) %>%
group_by(symbol) %>%
filter_time(time_formula = '2013-01-01' ~ '2013-01-04')
#> # A time tibble: 12 x 8
#> # Index: date
#> # Groups: symbol [4]
#> symbol date open high low close volume adjusted
#> <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 FB 2013-01-02 27.4 28.2 27.4 28.0 69846400 28.0
#> 2 FB 2013-01-03 27.9 28.5 27.6 27.8 63140600 27.8
#> 3 FB 2013-01-04 28.0 28.9 27.8 28.8 72715400 28.8
#> 4 AMZN 2013-01-02 256 258 253 257 3271000 257
#> 5 AMZN 2013-01-03 257 261 256 258 2750900 258
#> 6 AMZN 2013-01-04 258 260 257 259 1874200 259
#> 7 NFLX 2013-01-02 95.2 95.8 90.7 92.0 19431300 13.1
#> 8 NFLX 2013-01-03 92.0 97.9 91.5 96.6 27912500 13.8
#> 9 NFLX 2013-01-04 96.5 97.7 95.5 96.0 17761100 13.7
#> 10 GOOG 2013-01-02 719 727 717 723 5101500 361
#> 11 GOOG 2013-01-03 725 732 721 724 4653700 361
#> 12 GOOG 2013-01-04 729 741 728 738 5547600 369

根据分钟/秒来提取子集

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
my_df <- tibble::tibble(
date = lubridate::as_datetime(c("2018-01-05 20:54:16",
"2018-01-05 20:54:18",
"2018-01-05 20:55:54",
"2018-01-05 20:54:24",
"2018-01-05 20:55:44",
"2018-01-05 20:54:34")),
x2 = rnorm(6)
)
my_df
#> # A tibble: 6 x 2
#> date x2
#> <dttm> <dbl>
#> 1 2018-01-05 20:54:16 -0.343
#> 2 2018-01-05 20:54:18 -0.194
#> 3 2018-01-05 20:55:54 0.402
#> 4 2018-01-05 20:54:24 -0.753
#> 5 2018-01-05 20:55:44 -0.294
#> 6 2018-01-05 20:54:34 -1.97
my_df %>%
arrange(date) %>%
as_tbl_time(index = date) %>%
filter_time(time_formula = "2018-01-05 + 20:54:16" ~ "2018-01-05 + 20:54:44")
#> # A time tibble: 4 x 2
#> # Index: date
#> date x2
#> <dttm> <dbl>
#> 1 2018-01-05 20:54:16 -0.343
#> 2 2018-01-05 20:54:18 -0.194
#> 3 2018-01-05 20:54:24 -0.753
#> 4 2018-01-05 20:54:34 -1.97

除了使用filter_time()函数进行取子集操作外,tibbletime还支持常规的[操作。

1
FB_tbl_time[~ "2013"]
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#> # A time tibble: 252 x 8
#> # Index: date
#> symbol date open high low close volume adjusted
#> <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 FB 2013-01-02 27.4 28.2 27.4 28.0 69846400 28.0
#> 2 FB 2013-01-03 27.9 28.5 27.6 27.8 63140600 27.8
#> 3 FB 2013-01-04 28.0 28.9 27.8 28.8 72715400 28.8
#> 4 FB 2013-01-07 28.7 29.8 28.6 29.4 83781800 29.4
#> 5 FB 2013-01-08 29.5 29.6 28.9 29.1 45871300 29.1
#> 6 FB 2013-01-09 29.7 30.6 29.5 30.6 104787700 30.6
#> 7 FB 2013-01-10 30.6 31.5 30.3 31.3 95316400 31.3
#> 8 FB 2013-01-11 31.3 32.0 31.1 31.7 89598000 31.7
#> 9 FB 2013-01-14 32.1 32.2 30.6 31.0 98892800 31.0
#> 10 FB 2013-01-15 30.6 31.7 29.9 30.1 173242600 30.1
#> # ... with 242 more rows
1
FB_tbl_time["2013" ~ "2014-02", c(1, 2, 3)]
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#> # A time tibble: 292 x 3
#> # Index: date
#> symbol date open
#> <chr> <date> <dbl>
#> 1 FB 2013-01-02 27.4
#> 2 FB 2013-01-03 27.9
#> 3 FB 2013-01-04 28.0
#> 4 FB 2013-01-07 28.7
#> 5 FB 2013-01-08 29.5
#> 6 FB 2013-01-09 29.7
#> 7 FB 2013-01-10 30.6
#> 8 FB 2013-01-11 31.3
#> 9 FB 2013-01-14 32.1
#> 10 FB 2013-01-15 30.6
#> # ... with 282 more rows

利用filter_time()函数进行编程

1
2
date_var <- as.Date("2014-01-01")
filter_time(FB_tbl_time, 'start' ~ date_var)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#> # A time tibble: 252 x 8
#> # Index: date
#> symbol date open high low close volume adjusted
#> <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 FB 2013-01-02 27.4 28.2 27.4 28.0 69846400 28.0
#> 2 FB 2013-01-03 27.9 28.5 27.6 27.8 63140600 27.8
#> 3 FB 2013-01-04 28.0 28.9 27.8 28.8 72715400 28.8
#> 4 FB 2013-01-07 28.7 29.8 28.6 29.4 83781800 29.4
#> 5 FB 2013-01-08 29.5 29.6 28.9 29.1 45871300 29.1
#> 6 FB 2013-01-09 29.7 30.6 29.5 30.6 104787700 30.6
#> 7 FB 2013-01-10 30.6 31.5 30.3 31.3 95316400 31.3
#> 8 FB 2013-01-11 31.3 32.0 31.1 31.7 89598000 31.7
#> 9 FB 2013-01-14 32.1 32.2 30.6 31.0 98892800 31.0
#> 10 FB 2013-01-15 30.6 31.7 29.9 30.1 173242600 30.1
#> # ... with 242 more rows

tbl_time对象进行高频转低频

as_period()函数

1
as_period
1
2
3
4
5
6
#> function (.tbl_time, period = "yearly", start_date = NULL, side = "start",
#> include_endpoints = FALSE, ...)
#> {
#> UseMethod("as_period")
#> }
#> <environment: namespace:tibbletime>
1
2
3
# 将日数据转化为周数据
FB_tbl_time %>%
as_period(period = "weekly", side = "start")
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#> # A time tibble: 209 x 8
#> # Index: date
#> symbol date open high low close volume adjusted
#> <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 FB 2013-01-02 27.4 28.2 27.4 28.0 69846400 28.0
#> 2 FB 2013-01-07 28.7 29.8 28.6 29.4 83781800 29.4
#> 3 FB 2013-01-14 32.1 32.2 30.6 31.0 98892800 31.0
#> 4 FB 2013-01-22 29.8 30.9 29.7 30.7 55243300 30.7
#> 5 FB 2013-01-28 31.9 32.5 31.8 32.5 59682500 32.5
#> 6 FB 2013-02-04 29.1 29.2 28.0 28.1 92362200 28.1
#> 7 FB 2013-02-11 28.6 28.7 28.0 28.3 37361800 28.3
#> 8 FB 2013-02-19 28.2 29.1 28.1 28.9 49396400 28.9
#> 9 FB 2013-02-25 27.2 27.6 27.2 27.3 34652000 27.3
#> 10 FB 2013-03-04 27.8 28.1 27.4 27.7 32400700 27.7
#> # ... with 199 more rows
1
2
# side = "start"表示取每一个时间区间的第一个值
# 这里是取每周的第一个值
1
2
3
# 将日数据转化为月数据
FB_tbl_time %>%
as_period("monthly", side = "end")
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#> # A time tibble: 48 x 8
#> # Index: date
#> symbol date open high low close volume adjusted
#> <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 FB 2013-01-31 29.2 31.5 28.7 31.0 190744900 31.0
#> 2 FB 2013-02-28 26.8 27.3 26.3 27.2 83027800 27.2
#> 3 FB 2013-03-28 26.1 26.2 25.5 25.6 28585700 25.6
#> 4 FB 2013-04-30 27.1 27.8 27.0 27.8 36245700 27.8
#> 5 FB 2013-05-31 24.6 25.0 24.3 24.4 35925000 24.4
#> 6 FB 2013-06-28 24.7 25.0 24.4 24.9 96778900 24.9
#> 7 FB 2013-07-31 38.0 38.3 36.3 36.8 154828700 36.8
#> 8 FB 2013-08-30 42.0 42.3 41.1 41.3 67735100 41.3
#> 9 FB 2013-09-30 50.1 51.6 49.8 50.2 100095000 50.2
#> 10 FB 2013-10-31 47.2 52.0 46.5 50.2 248809000 50.2
#> # ... with 38 more rows
1
2
# side = "end"表示取每一个时间区间的最后一个值
# 这里是取每月的最后一个值
1
2
3
# 将日数据转化为季数据
FB_tbl_time %>%
as_period(period = "quarterly", side = "start")
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
#> # A time tibble: 16 x 8
#> # Index: date
#> symbol date open high low close volume adjusted
#> <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 FB 2013-01-02 27.4 28.2 27.4 28.0 69846400 28.0
#> 2 FB 2013-04-01 25.6 25.9 25.3 25.5 22249300 25.5
#> 3 FB 2013-07-01 25.0 25.1 24.6 24.8 20582200 24.8
#> 4 FB 2013-10-01 50.0 51.0 49.5 50.4 98114000 50.4
#> 5 FB 2014-01-02 54.8 55.2 54.2 54.7 43195500 54.7
#> 6 FB 2014-04-01 60.5 62.7 60.2 62.6 59291000 62.6
#> 7 FB 2014-07-01 67.6 68.4 67.4 68.1 33243000 68.1
#> 8 FB 2014-10-01 78.8 78.8 75.9 76.6 55090000 76.6
#> 9 FB 2015-01-02 78.6 78.9 77.7 78.4 18177500 78.4
#> 10 FB 2015-04-01 82.5 82.7 80.9 81.7 22058200 81.7
#> 11 FB 2015-07-01 86.8 87.9 86.5 86.9 25260000 86.9
#> 12 FB 2015-10-01 90.1 90.9 88.4 90.9 29283700 90.9
#> 13 FB 2016-01-04 102 102 99.8 102 37912400 102
#> 14 FB 2016-04-01 114 116 113 116 24760500 116
#> 15 FB 2016-07-01 114 115 114 114 14980000 114
#> 16 FB 2016-10-03 128 129 128 129 13156900 129
1
2
# side = "start"表示取每一个时间区间的第一个值
# 这里是取每季度的第一个值
1
2
3
# 将日数据转化为年数据
FB_tbl_time %>%
as_period(period = "yearly", side = "start")
1
2
3
4
5
6
7
8
#> # A time tibble: 4 x 8
#> # Index: date
#> symbol date open high low close volume adjusted
#> <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 FB 2013-01-02 27.4 28.2 27.4 28.0 69846400 28.0
#> 2 FB 2014-01-02 54.8 55.2 54.2 54.7 43195500 54.7
#> 3 FB 2015-01-02 78.6 78.9 77.7 78.4 18177500 78.4
#> 4 FB 2016-01-04 102 102 99.8 102 37912400 102
1
2
# side = "start"表示取每一个时间区间的第一个值
# 这里是取每年的第一个值
1
2
3
# 将日数据转化为每4天一次
FB_tbl_time %>%
as_period(period = "5 days", start_date = as.Date("2013-01-01"), side = "end")
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#> # A time tibble: 292 x 8
#> # Index: date
#> symbol date open high low close volume adjusted
#> <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 FB 2013-01-04 28.0 28.9 27.8 28.8 72715400 28.8
#> 2 FB 2013-01-10 30.6 31.5 30.3 31.3 95316400 31.3
#> 3 FB 2013-01-15 30.6 31.7 29.9 30.1 173242600 30.1
#> 4 FB 2013-01-18 30.3 30.4 29.3 29.7 49631500 29.7
#> 5 FB 2013-01-25 31.4 31.9 31.1 31.5 54363600 31.5
#> 6 FB 2013-01-30 31.0 31.5 30.9 31.2 87682100 31.2
#> 7 FB 2013-02-04 29.1 29.2 28.0 28.1 92362200 28.1
#> 8 FB 2013-02-08 28.9 29.2 28.5 28.5 37708800 28.5
#> 9 FB 2013-02-14 28.0 28.6 28.0 28.5 35615800 28.5
#> 10 FB 2013-02-19 28.2 29.1 28.1 28.9 49396400 28.9
#> # ... with 282 more rows
1
2
# 注:这里的`start_date`必须小于或者等于时间索引列的最小值
# 这里原始索引列的最小值为`2013-01-02`,这里`start_date`设置为`2013-01-01`,做了更改。
1
2
3
4
5
6
# 操作分组数据
data("FANG")
FANG %>%
group_by(symbol) %>%
as_tbl_time(index = date) %>%
as_period(period = "2 years", side = "start")
1
2
3
4
5
6
7
8
9
10
11
12
13
#> # A time tibble: 8 x 8
#> # Index: date
#> # Groups: symbol [4]
#> symbol date open high low close volume adjusted
#> <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 FB 2013-01-02 27.4 28.2 27.4 28.0 69846400 28.0
#> 2 FB 2015-01-02 78.6 78.9 77.7 78.4 18177500 78.4
#> 3 AMZN 2013-01-02 256 258 253 257 3271000 257
#> 4 AMZN 2015-01-02 313 315 307 309 2783200 309
#> 5 NFLX 2013-01-02 95.2 95.8 90.7 92.0 19431300 13.1
#> 6 NFLX 2015-01-02 344 352 341 349 13475000 49.8
#> 7 GOOG 2013-01-02 719 727 717 723 5101500 361
#> 8 GOOG 2015-01-02 529 531 524 525 1447500 525

滚动计算

rollify()函数

1
rollify
1
2
3
4
5
6
7
8
9
#> function (.f, window = 1, unlist = TRUE, na_value = NULL)
#> {
#> .f <- purrr::as_mapper(.f)
#> function(...) {
#> roller(..., .f = .f, window = window, unlist = unlist,
#> na_value = na_value)
#> }
#> }
#> <environment: namespace:tibbletime>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# 计算5期的移动平均
mean_5 <- rollify(mean, window = 5) # `window`表示滚动窗口
mean_5
#> function (...)
#> {
#> roller(..., .f = .f, window = window, unlist = unlist, na_value = na_value)
#> }
#> <environment: 0x000000001903d058>
FB_tbl_time %>%
mutate(roll_mean = mean_5(adjusted))
#> # A time tibble: 1,008 x 9
#> # Index: date
#> symbol date open high low close volume adjusted roll_mean
#> <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 FB 2013-01-02 27.4 28.2 27.4 28.0 69846400 28.0 NA
#> 2 FB 2013-01-03 27.9 28.5 27.6 27.8 63140600 27.8 NA
#> 3 FB 2013-01-04 28.0 28.9 27.8 28.8 72715400 28.8 NA
#> 4 FB 2013-01-07 28.7 29.8 28.6 29.4 83781800 29.4 NA
#> 5 FB 2013-01-08 29.5 29.6 28.9 29.1 45871300 29.1 28.6
#> 6 FB 2013-01-09 29.7 30.6 29.5 30.6 104787700 30.6 29.1
#> 7 FB 2013-01-10 30.6 31.5 30.3 31.3 95316400 31.3 29.8
#> 8 FB 2013-01-11 31.3 32.0 31.1 31.7 89598000 31.7 30.4
#> 9 FB 2013-01-14 32.1 32.2 30.6 31.0 98892800 31.0 30.7
#> 10 FB 2013-01-15 30.6 31.7 29.9 30.1 173242600 30.1 30.9
#> # ... with 998 more rows
FB_tbl_time %>%
pull(adjusted) %>%
.[1:5] %>%
mean(., na.rm = TRUE) # 正是`roll_mean`这一列的计算方法
#> [1] 28.602
1
2
3
4
5
6
7
8
9
10
# 计算2,3,4期的移动平均
rolling_mean_2 <- rollify(mean, window = 2)
rolling_mean_3 <- rollify(mean, window = 3)
rolling_mean_4 <- rollify(mean, window = 4)
FB %>% mutate(
rm10 = rolling_mean_2(adjusted),
rm20 = rolling_mean_3(adjusted),
rm30 = rolling_mean_4(adjusted)
)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
#> # A tibble: 1,008 x 11
#> symb~ date open high low close volume adju~ rm10 rm20 rm30
#> <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 FB 2013-01-02 27.4 28.2 27.4 28.0 6.98e7 28.0 NA NA NA
#> 2 FB 2013-01-03 27.9 28.5 27.6 27.8 6.31e7 27.8 27.9 NA NA
#> 3 FB 2013-01-04 28.0 28.9 27.8 28.8 7.27e7 28.8 28.3 28.2 NA
#> 4 FB 2013-01-07 28.7 29.8 28.6 29.4 8.38e7 29.4 29.1 28.6 28.5
#> 5 FB 2013-01-08 29.5 29.6 28.9 29.1 4.59e7 29.1 29.2 29.1 28.8
#> 6 FB 2013-01-09 29.7 30.6 29.5 30.6 1.05e8 30.6 29.8 29.7 29.5
#> 7 FB 2013-01-10 30.6 31.5 30.3 31.3 9.53e7 31.3 30.9 30.3 30.1
#> 8 FB 2013-01-11 31.3 32.0 31.1 31.7 8.96e7 31.7 31.5 31.2 30.7
#> 9 FB 2013-01-14 32.1 32.2 30.6 31.0 9.89e7 31.0 31.3 31.3 31.1
#> 10 FB 2013-01-15 30.6 31.7 29.9 30.1 1.73e8 30.1 30.5 30.9 31.0
#> # ... with 998 more rows
1
2
3
4
# 开盘价和收盘价均价的5期移动平均值
rolling_avg_sum <- rollify(~ mean(.x + .y), window = 5)
mutate(FB_tbl_time, avg_sum = rolling_avg_sum(open, close))
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#> # A time tibble: 1,008 x 9
#> # Index: date
#> symbol date open high low close volume adjusted avg_sum
#> <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 FB 2013-01-02 27.4 28.2 27.4 28.0 69846400 28.0 NA
#> 2 FB 2013-01-03 27.9 28.5 27.6 27.8 63140600 27.8 NA
#> 3 FB 2013-01-04 28.0 28.9 27.8 28.8 72715400 28.8 NA
#> 4 FB 2013-01-07 28.7 29.8 28.6 29.4 83781800 29.4 NA
#> 5 FB 2013-01-08 29.5 29.6 28.9 29.1 45871300 29.1 56.9
#> 6 FB 2013-01-09 29.7 30.6 29.5 30.6 104787700 30.6 57.9
#> 7 FB 2013-01-10 30.6 31.5 30.3 31.3 95316400 31.3 59.1
#> 8 FB 2013-01-11 31.3 32.0 31.1 31.7 89598000 31.7 60.4
#> 9 FB 2013-01-14 32.1 32.2 30.6 31.0 98892800 31.0 61.4
#> 10 FB 2013-01-15 30.6 31.7 29.9 30.1 173242600 30.1 61.8
#> # ... with 998 more rows
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# 实现list-column
summary_df <- function(x) {
data.frame(
rolled_summary_type = c("mean", "sd", "min", "max", "median"),
rolled_summary_val = c(mean(x), sd(x), min(x), max(x), median(x))
)
}
# A rolling version, with unlist = FALSE
rolling_summary <- rollify(~ summary_df(.x), window = 5,
unlist = FALSE)
FB_summarised <- mutate(FB_tbl_time, summary_list_col = rolling_summary(adjusted))
FB_summarised
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#> # A time tibble: 1,008 x 9
#> # Index: date
#> symbol date open high low close volume adjusted summary_l~
#> <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <list>
#> 1 FB 2013-01-02 27.4 28.2 27.4 28.0 69846400 28.0 <lgl [1]>
#> 2 FB 2013-01-03 27.9 28.5 27.6 27.8 63140600 27.8 <lgl [1]>
#> 3 FB 2013-01-04 28.0 28.9 27.8 28.8 72715400 28.8 <lgl [1]>
#> 4 FB 2013-01-07 28.7 29.8 28.6 29.4 83781800 29.4 <lgl [1]>
#> 5 FB 2013-01-08 29.5 29.6 28.9 29.1 45871300 29.1 <data.fra~
#> 6 FB 2013-01-09 29.7 30.6 29.5 30.6 104787700 30.6 <data.fra~
#> 7 FB 2013-01-10 30.6 31.5 30.3 31.3 95316400 31.3 <data.fra~
#> 8 FB 2013-01-11 31.3 32.0 31.1 31.7 89598000 31.7 <data.fra~
#> 9 FB 2013-01-14 32.1 32.2 30.6 31.0 98892800 31.0 <data.fra~
#> 10 FB 2013-01-15 30.6 31.7 29.9 30.1 173242600 30.1 <data.fra~
#> # ... with 998 more rows
1
2
3
FB_summarised %>%
filter(!is.na(summary_list_col)) %>%
tidyr::unnest()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#> # A time tibble: 5,020 x 10
#> # Index: date
#> symbol date open high low close volume adjus~ roll~ rolle~
#> <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <dbl>
#> 1 FB 2013-01-08 29.5 29.6 28.9 29.1 45871300 29.1 mean 28.6
#> 2 FB 2013-01-08 29.5 29.6 28.9 29.1 45871300 29.1 sd 0.700
#> 3 FB 2013-01-08 29.5 29.6 28.9 29.1 45871300 29.1 min 27.8
#> 4 FB 2013-01-08 29.5 29.6 28.9 29.1 45871300 29.1 max 29.4
#> 5 FB 2013-01-08 29.5 29.6 28.9 29.1 45871300 29.1 medi~ 28.8
#> 6 FB 2013-01-09 29.7 30.6 29.5 30.6 104787700 30.6 mean 29.1
#> 7 FB 2013-01-09 29.7 30.6 29.5 30.6 104787700 30.6 sd 1.03
#> 8 FB 2013-01-09 29.7 30.6 29.5 30.6 104787700 30.6 min 27.8
#> 9 FB 2013-01-09 29.7 30.6 29.5 30.6 104787700 30.6 max 30.6
#> 10 FB 2013-01-09 29.7 30.6 29.5 30.6 104787700 30.6 medi~ 29.1
#> # ... with 5,010 more rows
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# 计算移动平均后缺失值的处理问题
rolling_summary <- rollify(~summary_df(.x), window = 5,
unlist = FALSE, na_value = data.frame())
FB_summarised <- mutate(FB, summary_list_col = rolling_summary(adjusted))
FB_summarised
#> # A tibble: 1,008 x 9
#> symbol date open high low close volume adjusted summary_l~
#> <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <list>
#> 1 FB 2013-01-02 27.4 28.2 27.4 28.0 69846400 28.0 <data.fra~
#> 2 FB 2013-01-03 27.9 28.5 27.6 27.8 63140600 27.8 <data.fra~
#> 3 FB 2013-01-04 28.0 28.9 27.8 28.8 72715400 28.8 <data.fra~
#> 4 FB 2013-01-07 28.7 29.8 28.6 29.4 83781800 29.4 <data.fra~
#> 5 FB 2013-01-08 29.5 29.6 28.9 29.1 45871300 29.1 <data.fra~
#> 6 FB 2013-01-09 29.7 30.6 29.5 30.6 104787700 30.6 <data.fra~
#> 7 FB 2013-01-10 30.6 31.5 30.3 31.3 95316400 31.3 <data.fra~
#> 8 FB 2013-01-11 31.3 32.0 31.1 31.7 89598000 31.7 <data.fra~
#> 9 FB 2013-01-14 32.1 32.2 30.6 31.0 98892800 31.0 <data.fra~
#> 10 FB 2013-01-15 30.6 31.7 29.9 30.1 173242600 30.1 <data.fra~
#> # ... with 998 more rows
FB_summarised %>%
tidyr::unnest()
#> # A tibble: 5,020 x 10
#> symbol date open high low close volume adjus~ roll~ rolle~
#> <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <dbl>
#> 1 FB 2013-01-08 29.5 29.6 28.9 29.1 45871300 29.1 mean 28.6
#> 2 FB 2013-01-08 29.5 29.6 28.9 29.1 45871300 29.1 sd 0.700
#> 3 FB 2013-01-08 29.5 29.6 28.9 29.1 45871300 29.1 min 27.8
#> 4 FB 2013-01-08 29.5 29.6 28.9 29.1 45871300 29.1 max 29.4
#> 5 FB 2013-01-08 29.5 29.6 28.9 29.1 45871300 29.1 medi~ 28.8
#> 6 FB 2013-01-09 29.7 30.6 29.5 30.6 104787700 30.6 mean 29.1
#> 7 FB 2013-01-09 29.7 30.6 29.5 30.6 104787700 30.6 sd 1.03
#> 8 FB 2013-01-09 29.7 30.6 29.5 30.6 104787700 30.6 min 27.8
#> 9 FB 2013-01-09 29.7 30.6 29.5 30.6 104787700 30.6 max 30.6
#> 10 FB 2013-01-09 29.7 30.6 29.5 30.6 104787700 30.6 medi~ 29.1
#> # ... with 5,010 more rows

创建tbl_time对象

create_series()函数

1
2
# 创建一整年的数据,每2天为一个间隔
create_series(~'2013', period = '2 day')
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#> # A time tibble: 183 x 1
#> # Index: date
#> date
#> <dttm>
#> 1 2013-01-01 00:00:00
#> 2 2013-01-03 00:00:00
#> 3 2013-01-05 00:00:00
#> 4 2013-01-07 00:00:00
#> 5 2013-01-09 00:00:00
#> 6 2013-01-11 00:00:00
#> 7 2013-01-13 00:00:00
#> 8 2013-01-15 00:00:00
#> 9 2013-01-17 00:00:00
#> 10 2013-01-19 00:00:00
#> # ... with 173 more rows
1
2
# 2013年的日度数据
create_series(~'2013', period = 'daily')
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#> # A time tibble: 365 x 1
#> # Index: date
#> date
#> <dttm>
#> 1 2013-01-01 00:00:00
#> 2 2013-01-02 00:00:00
#> 3 2013-01-03 00:00:00
#> 4 2013-01-04 00:00:00
#> 5 2013-01-05 00:00:00
#> 6 2013-01-06 00:00:00
#> 7 2013-01-07 00:00:00
#> 8 2013-01-08 00:00:00
#> 9 2013-01-09 00:00:00
#> 10 2013-01-10 00:00:00
#> # ... with 355 more rows
1
2
# 2013年的季度数据
create_series(~'2013', period = '1 quarter')
1
2
3
4
5
6
7
8
#> # A time tibble: 4 x 1
#> # Index: date
#> date
#> <dttm>
#> 1 2013-01-01 00:00:00
#> 2 2013-04-01 00:00:00
#> 3 2013-07-01 00:00:00
#> 4 2013-10-01 00:00:00
1
2
# 2013年至2015年的季度数据
create_series('2013' ~ '2015', period = '1 day')
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#> # A time tibble: 1,095 x 1
#> # Index: date
#> date
#> <dttm>
#> 1 2013-01-01 00:00:00
#> 2 2013-01-02 00:00:00
#> 3 2013-01-03 00:00:00
#> 4 2013-01-04 00:00:00
#> 5 2013-01-05 00:00:00
#> 6 2013-01-06 00:00:00
#> 7 2013-01-07 00:00:00
#> 8 2013-01-08 00:00:00
#> 9 2013-01-09 00:00:00
#> 10 2013-01-10 00:00:00
#> # ... with 1,085 more rows
1
2
# 2012年1月至2012年2月每分钟的高频数据
create_series('2012-01' ~ '2012-02', period = '1 minute')
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#> # A time tibble: 86,400 x 1
#> # Index: date
#> date
#> <dttm>
#> 1 2012-01-01 00:00:00
#> 2 2012-01-01 00:01:00
#> 3 2012-01-01 00:02:00
#> 4 2012-01-01 00:03:00
#> 5 2012-01-01 00:04:00
#> 6 2012-01-01 00:05:00
#> 7 2012-01-01 00:06:00
#> 8 2012-01-01 00:07:00
#> 9 2012-01-01 00:08:00
#> 10 2012-01-01 00:09:00
#> # ... with 86,390 more rows
1
2
# 2011年1月1日每秒钟的高频数据
create_series('2011-01-01 12:10:00' ~ '2011-01-01 12:12:00', period = '1 second')
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#> # A time tibble: 121 x 1
#> # Index: date
#> date
#> <dttm>
#> 1 2011-01-01 12:10:00
#> 2 2011-01-01 12:10:01
#> 3 2011-01-01 12:10:02
#> 4 2011-01-01 12:10:03
#> 5 2011-01-01 12:10:04
#> 6 2011-01-01 12:10:05
#> 7 2011-01-01 12:10:06
#> 8 2011-01-01 12:10:07
#> 9 2011-01-01 12:10:08
#> 10 2011-01-01 12:10:09
#> # ... with 111 more rows
1
2
# 创建类型为`Date`的序列
create_series(~'2013', period = '1 day', class = "Date")
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#> # A time tibble: 365 x 1
#> # Index: date
#> date
#> <date>
#> 1 2013-01-01
#> 2 2013-01-02
#> 3 2013-01-03
#> 4 2013-01-04
#> 5 2013-01-05
#> 6 2013-01-06
#> 7 2013-01-07
#> 8 2013-01-08
#> 9 2013-01-09
#> 10 2013-01-10
#> # ... with 355 more rows
1
2
# 创建类型为`yearmon`的序列
create_series(~'2013', period = '1 month', class = "yearmon")
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
#> # A time tibble: 12 x 1
#> # Index: date
#> date
#> <S3: yearmon>
#> 1 1月 2013
#> 2 2月 2013
#> 3 3月 2013
#> 4 4月 2013
#> 5 5月 2013
#> 6 6月 2013
#> 7 7月 2013
#> 8 8月 2013
#> 9 9月 2013
#> 10 10月 2013
#> 11 11月 2013
#> 12 12月 2013
1
2
3
# 创建类型为`hms`的序列
# `time_formula`的格式为`HH:MM:SS`
create_series(time_formula = '00:00:00' ~ '12:00:00', period = '1 second' , class = "hms")
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#> # A time tibble: 43,201 x 1
#> # Index: date
#> date
#> <time>
#> 1 00'00"
#> 2 00'01"
#> 3 00'02"
#> 4 00'03"
#> 5 00'04"
#> 6 00'05"
#> 7 00'06"
#> 8 00'07"
#> 9 00'08"
#> 10 00'09"
#> # ... with 43,191 more rows

分组操作

collapse_index()函数

1
collapse_index
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
#> function (index, period = "yearly", start_date = NULL, side = "end",
#> ...)
#> {
#> index_part <- partition_index(index, period, start_date)
#> index_num <- to_posixct_numeric(index)
#> if (side == "start") {
#> pos <- match(unique(index_part), index_part)
#> index_at_pos <- index_num[pos]
#> reps <- diff(c(pos, length(index_part) + 1))
#> }
#> else if (side == "end") {
#> pos <- length(index_part) - match(unique(index_part),
#> rev(index_part)) + 1
#> index_at_pos <- index_num[pos]
#> reps <- diff(c(0, pos))
#> }
#> new_index <- posixct_numeric_to_datetime(x = rep(index_at_pos,
#> reps), class = get_index_col_class(index), tz = get_index_col_time_zone(index))
#> new_index
#> }
#> <environment: namespace:tibbletime>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# 计算每个月的均值
FB_tbl_time %>%
select(-symbol) %>%
mutate(date = collapse_index(index = date, period = "monthly", side = "end")) %>%
group_by(date) %>%
summarise_all(mean)
#> # A time tibble: 48 x 7
#> # Index: date
#> date open high low close volume adjusted
#> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 2013-01-31 30.2 30.8 29.8 30.3 79802462 30.3
#> 2 2013-02-28 28.3 28.6 27.7 28.1 50402095 28.1
#> 3 2013-03-28 26.9 27.2 26.5 26.8 36359025 26.8
#> 4 2013-04-30 26.6 27.0 26.2 26.6 33568600 26.6
#> 5 2013-05-31 26.4 26.6 25.9 26.1 44640673 26.1
#> 6 2013-06-28 24.0 24.3 23.7 23.9 39416575 23.9
#> 7 2013-07-31 27.7 28.2 27.4 27.9 65364414 27.9
#> 8 2013-08-30 38.7 39.3 38.2 38.7 61136095 38.7
#> 9 2013-09-30 45.5 46.3 44.9 45.8 79154190 45.8
#> 10 2013-10-31 50.7 51.5 49.7 50.5 88375435 50.5
#> # ... with 38 more rows
# 等同于
FB_tbl_time %>%
select(-symbol) %>%
mutate(date = collapse_index(index = date, period = "1 month", side = "end")) %>%
group_by(date) %>%
summarise_all(mean)
#> # A time tibble: 48 x 7
#> # Index: date
#> date open high low close volume adjusted
#> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 2013-01-31 30.2 30.8 29.8 30.3 79802462 30.3
#> 2 2013-02-28 28.3 28.6 27.7 28.1 50402095 28.1
#> 3 2013-03-28 26.9 27.2 26.5 26.8 36359025 26.8
#> 4 2013-04-30 26.6 27.0 26.2 26.6 33568600 26.6
#> 5 2013-05-31 26.4 26.6 25.9 26.1 44640673 26.1
#> 6 2013-06-28 24.0 24.3 23.7 23.9 39416575 23.9
#> 7 2013-07-31 27.7 28.2 27.4 27.9 65364414 27.9
#> 8 2013-08-30 38.7 39.3 38.2 38.7 61136095 38.7
#> 9 2013-09-30 45.5 46.3 44.9 45.8 79154190 45.8
#> 10 2013-10-31 50.7 51.5 49.7 50.5 88375435 50.5
#> # ... with 38 more rows

来看下collpase_index()函数究竟做了什么:

1
2
3
4
FB_tbl_time %>%
select(-symbol) %>%
# 这里side = "start"/"end",前者是该时间区间的第一个数,后者是最后一个数
mutate(date = collapse_index(index = date, period = "monthly", side = "end"))
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#> # A time tibble: 1,008 x 7
#> # Index: date
#> date open high low close volume adjusted
#> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 2013-01-31 27.4 28.2 27.4 28.0 69846400 28.0
#> 2 2013-01-31 27.9 28.5 27.6 27.8 63140600 27.8
#> 3 2013-01-31 28.0 28.9 27.8 28.8 72715400 28.8
#> 4 2013-01-31 28.7 29.8 28.6 29.4 83781800 29.4
#> 5 2013-01-31 29.5 29.6 28.9 29.1 45871300 29.1
#> 6 2013-01-31 29.7 30.6 29.5 30.6 104787700 30.6
#> 7 2013-01-31 30.6 31.5 30.3 31.3 95316400 31.3
#> 8 2013-01-31 31.3 32.0 31.1 31.7 89598000 31.7
#> 9 2013-01-31 32.1 32.2 30.6 31.0 98892800 31.0
#> 10 2013-01-31 30.6 31.7 29.9 30.1 173242600 30.1
#> # ... with 998 more rows

很明显,collpase_index()函数创建了一个同FB_tbl_time行数相同的时间列,然后dplyr中的group_by()函数根据
这一列进行分组,最后通过聚合函数mean()计算每个月的均值。

再看一个例子

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# Facebook, Amazon, Netflix and Google stocks
data(FANG)
FANG
#> # A tibble: 4,032 x 8
#> symbol date open high low close volume adjusted
#> <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 FB 2013-01-02 27.4 28.2 27.4 28.0 69846400 28.0
#> 2 FB 2013-01-03 27.9 28.5 27.6 27.8 63140600 27.8
#> 3 FB 2013-01-04 28.0 28.9 27.8 28.8 72715400 28.8
#> 4 FB 2013-01-07 28.7 29.8 28.6 29.4 83781800 29.4
#> 5 FB 2013-01-08 29.5 29.6 28.9 29.1 45871300 29.1
#> 6 FB 2013-01-09 29.7 30.6 29.5 30.6 104787700 30.6
#> 7 FB 2013-01-10 30.6 31.5 30.3 31.3 95316400 31.3
#> 8 FB 2013-01-11 31.3 32.0 31.1 31.7 89598000 31.7
#> 9 FB 2013-01-14 32.1 32.2 30.6 31.0 98892800 31.0
#> 10 FB 2013-01-15 30.6 31.7 29.9 30.1 173242600 30.1
#> # ... with 4,022 more rows
# 根据时间索引列和股票代码分组
FANG %>%
as_tbl_time(date) %>%
group_by(symbol) %>%
# 日度数据转化为年度数据
mutate(date = collapse_index(date, period = "yearly")) %>%
# Additionally group by date (yearly)
group_by(date, add = TRUE) %>%
# Perform a yearly summary for each symbol
summarise(
adj_min = min(adjusted),
adj_max = max(adjusted),
adj_range = adj_max - adj_min
)
#> # A time tibble: 16 x 5
#> # Index: date
#> # Groups: symbol [?]
#> symbol date adj_min adj_max adj_range
#> <chr> <date> <dbl> <dbl> <dbl>
#> 1 AMZN 2013-12-31 248 404 156
#> 2 AMZN 2014-12-31 287 407 120
#> 3 AMZN 2015-12-31 287 694 407
#> 4 AMZN 2016-12-30 482 844 362
#> 5 FB 2013-12-31 22.9 58.0 35.1
#> 6 FB 2014-12-31 53.5 81.4 27.9
#> 7 FB 2015-12-31 74.1 109 35.0
#> 8 FB 2016-12-30 94.2 133 39.1
#> 9 GOOG 2013-12-31 351 560 209
#> 10 GOOG 2014-12-31 495 609 114
#> 11 GOOG 2015-12-31 493 777 284
#> 12 GOOG 2016-12-30 668 813 145
#> 13 NFLX 2013-12-31 13.1 54.4 41.2
#> 14 NFLX 2014-12-31 44.9 69.2 24.3
#> 15 NFLX 2015-12-31 45.5 131 85.4
#> 16 NFLX 2016-12-30 82.8 128 45.6

滚动回归

1
2
3
4
5
6
7
8
9
10
data("FB")
rolling_lm <- rollify(.f = function(close, high, low, volume) {
lm(close ~ high + low + volume)
},
window = 5,
unlist = FALSE) # 将回归结果存储为一个`S3`对象
FB_reg <- mutate(FB, roll_lm = rolling_lm(close, high, low, volume))
FB_reg
1
2
3
4
5
6
7
8
9
10
11
12
13
14
#> # A tibble: 1,008 x 9
#> symbol date open high low close volume adjusted roll_lm
#> <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <list>
#> 1 FB 2013-01-02 27.4 28.2 27.4 28.0 69846400 28.0 <lgl [1]>
#> 2 FB 2013-01-03 27.9 28.5 27.6 27.8 63140600 27.8 <lgl [1]>
#> 3 FB 2013-01-04 28.0 28.9 27.8 28.8 72715400 28.8 <lgl [1]>
#> 4 FB 2013-01-07 28.7 29.8 28.6 29.4 83781800 29.4 <lgl [1]>
#> 5 FB 2013-01-08 29.5 29.6 28.9 29.1 45871300 29.1 <S3: lm>
#> 6 FB 2013-01-09 29.7 30.6 29.5 30.6 104787700 30.6 <S3: lm>
#> 7 FB 2013-01-10 30.6 31.5 30.3 31.3 95316400 31.3 <S3: lm>
#> 8 FB 2013-01-11 31.3 32.0 31.1 31.7 89598000 31.7 <S3: lm>
#> 9 FB 2013-01-14 32.1 32.2 30.6 31.0 98892800 31.0 <S3: lm>
#> 10 FB 2013-01-15 30.6 31.7 29.9 30.1 173242600 30.1 <S3: lm>
#> # ... with 998 more rows

参考