tibbletime包：时序数据处理的倚天剑

tibbletime是Business Science团队推出的一款与tidyverse框架完美契合的时序数据包。tibbletime提供了一种新的时序数据类型——tbl_time，其继承了R中数据框的data.frame类以及tibble包中的tbl_df类，与dplyr这一数据处理利器完美契合。同时，tibbletime还提供了一系列强大的函数用以处理tbl_time类型的数据。
tibbletime包的作者这样介绍其优势：

filter_time()函数：简洁的切片操作
collapse_index()函数：根据时间（例如年、月、每两周等）对索引列进行分割，进而利用dplyr包的分组函数计算汇总数据
as_period()函数：改变一个time tibble的时间间隔，让日数据变为周数据/月数据更为简单
rolliyf()函数：滚动分析
create_series()函数：快捷地创建tbl_time类型的时序数据
索引列完美支持Date和POSIXct类型，初步支持yearmon，yearqtr和hms类型。

filter_time
#> function (.tbl_time, time_formula) 
#> {
#>     UseMethod("filter_time")
#> }
#> <environment: namespace:tibbletime>
collapse_index
#> function (index, period = "yearly", start_date = NULL, side = "end", 
#>     ...) 
#> {
#>     index_part <- partition_index(index, period, start_date)
#>     index_num <- to_posixct_numeric(index)
#>     if (side == "start") {
#>         pos <- match(unique(index_part), index_part)
#>         index_at_pos <- index_num[pos]
#>         reps <- diff(c(pos, length(index_part) + 1))
#>     }
#>     else if (side == "end") {
#>         pos <- length(index_part) - match(unique(index_part), 
#>             rev(index_part)) + 1
#>         index_at_pos <- index_num[pos]
#>         reps <- diff(c(0, pos))
#>     }
#>     new_index <- posixct_numeric_to_datetime(x = rep(index_at_pos, 
#>         reps), class = get_index_col_class(index), tz = get_index_col_time_zone(index))
#>     new_index
#> }
#> <environment: namespace:tibbletime>
as_period
#> function (.tbl_time, period = "yearly", start_date = NULL, side = "start", 
#>     include_endpoints = FALSE, ...) 
#> {
#>     UseMethod("as_period")
#> }
#> <environment: namespace:tibbletime>
rollify
#> function (.f, window = 1, unlist = TRUE, na_value = NULL) 
#> {
#>     .f <- purrr::as_mapper(.f)
#>     function(...) {
#>         roller(..., .f = .f, window = window, unlist = unlist, 
#>             na_value = na_value)
#>     }
#> }
#> <environment: namespace:tibbletime>
create_series
#> function (time_formula, period = "daily", class = "POSIXct", 
#>     include_end = FALSE, tz = "UTC", as_vector = FALSE) 
#> {
#>     period_list <- parse_period(period)
#>     dummy_index <- make_dummy_dispatch_obj(class)
#>     assert_allowed_datetime(dummy_index)
#>     assert_period_matches_index_class(dummy_index, period_list$period)
#>     seq_fun <- lookup_seq_fun(dummy_index)
#>     tf_list <- parse_time_formula(dummy_index, time_formula)
#>     from_to <- purrr::map(tf_list, ~list_to_datetime(dummy_index, 
#>         .x, tz = tz))
#>     from <- from_to[[1]]
#>     to <- from_to[[2]]
#>     by <- paste(period_list$freq, period_list$period)
#>     assert_from_before_to(from, to)
#>     date_seq <- seq_fun(from, to, by = by)
#>     if (include_end) {
#>         if (max(date_seq) < to) {
#>             date_seq <- push_datetime(date_seq, to)
#>         }
#>     }
#>     if (as_vector) {
#>         date_seq
#>     }
#>     else {
#>         as_tbl_time(tibble::tibble(date = date_seq), date)
#>     }
#> }
#> <environment: namespace:tibbletime>

下面将通过实例来展示tibbletime的强大。

## 安装
# 正式版
install.packages("tibbletime")
# 开发板
devtools::install_github("business-science/tibbletime")

`tibbletime`操作

library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(tibbletime)

# ls(getNamespace("tibbletime"))
ls("package:tibbletime")
#>  [1] "%>%"                 "as_period"           "as_tbl_time"        
#>  [4] "ceiling_index"       "collapse_index"      "create_series"      
#>  [7] "filter"              "filter_time"         "floor_index"        
#> [10] "get_index_char"      "get_index_class"     "get_index_col"      
#> [13] "get_index_quo"       "get_index_time_zone" "parse_period"       
#> [16] "partition_index"     "reconstruct"         "rollify"            
#> [19] "tbl_time"

将常规的数据框转化为`tbl_time`

# Facebook stock prices. Comes with the package
data("FB")
FB
#> # A tibble: 1,008 x 8
#>    symbol date        open  high   low close    volume adjusted
#>    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
#>  1 FB     2013-01-02  27.4  28.2  27.4  28.0  69846400     28.0
#>  2 FB     2013-01-03  27.9  28.5  27.6  27.8  63140600     27.8
#>  3 FB     2013-01-04  28.0  28.9  27.8  28.8  72715400     28.8
#>  4 FB     2013-01-07  28.7  29.8  28.6  29.4  83781800     29.4
#>  5 FB     2013-01-08  29.5  29.6  28.9  29.1  45871300     29.1
#>  6 FB     2013-01-09  29.7  30.6  29.5  30.6 104787700     30.6
#>  7 FB     2013-01-10  30.6  31.5  30.3  31.3  95316400     31.3
#>  8 FB     2013-01-11  31.3  32.0  31.1  31.7  89598000     31.7
#>  9 FB     2013-01-14  32.1  32.2  30.6  31.0  98892800     31.0
#> 10 FB     2013-01-15  30.6  31.7  29.9  30.1 173242600     30.1
#> # ... with 998 more rows
class(FB)
#> [1] "tbl_df"     "tbl"        "data.frame"

FB_tbl_time <- FB %>% 
  as_tbl_time(index = date)
FB_tbl_time
#> # A time tibble: 1,008 x 8
#> # Index: date
#>    symbol date        open  high   low close    volume adjusted
#>    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
#>  1 FB     2013-01-02  27.4  28.2  27.4  28.0  69846400     28.0
#>  2 FB     2013-01-03  27.9  28.5  27.6  27.8  63140600     27.8
#>  3 FB     2013-01-04  28.0  28.9  27.8  28.8  72715400     28.8
#>  4 FB     2013-01-07  28.7  29.8  28.6  29.4  83781800     29.4
#>  5 FB     2013-01-08  29.5  29.6  28.9  29.1  45871300     29.1
#>  6 FB     2013-01-09  29.7  30.6  29.5  30.6 104787700     30.6
#>  7 FB     2013-01-10  30.6  31.5  30.3  31.3  95316400     31.3
#>  8 FB     2013-01-11  31.3  32.0  31.1  31.7  89598000     31.7
#>  9 FB     2013-01-14  32.1  32.2  30.6  31.0  98892800     31.0
#> 10 FB     2013-01-15  30.6  31.7  29.9  30.1 173242600     30.1
#> # ... with 998 more rows
class(FB_tbl_time)
#> [1] "tbl_time"   "tbl_df"     "tbl"        "data.frame"

my_df <- data.frame(x1 = Sys.time() + 1:20,
                    x2 = letters[1:20],
                    x3 = round(rnorm(20, mean = 3), 4))
head(my_df)
#>                    x1 x2     x3
#> 1 2018-01-06 10:28:58  a 3.4440
#> 2 2018-01-06 10:28:59  b 4.3121
#> 3 2018-01-06 10:29:00  c 2.5993
#> 4 2018-01-06 10:29:01  d 3.0153
#> 5 2018-01-06 10:29:02  e 3.1968
#> 6 2018-01-06 10:29:03  f 1.5981
my_df %>% 
  as_tbl_time(index = x1) %>% 
  head(n = 4)
#> # A time tibble: 4 x 3
#> # Index: x1
#>   x1                  x2        x3
#>   <dttm>              <fctr> <dbl>
#> 1 2018-01-06 10:28:58 a       3.44
#> 2 2018-01-06 10:28:59 b       4.31
#> 3 2018-01-06 10:29:00 c       2.60
#> 4 2018-01-06 10:29:01 d       3.02

对`tbl_time`对象切片

filter_time()函数

1	filter_time

#> function (.tbl_time, time_formula) 
#> {
#>     UseMethod("filter_time")
#> }
#> <environment: namespace:tibbletime>

1
2
3

# 提取2013年3月至2015年的数据
FB_tbl_time %>% 
  filter_time(time_formula = '2013-03' ~ '2015')

#> # A time tibble: 716 x 8
#> # Index: date
#>    symbol date        open  high   low close   volume adjusted
#>    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>
#>  1 FB     2013-03-01  27.0  28.1  26.8  27.8 54064800     27.8
#>  2 FB     2013-03-04  27.8  28.1  27.4  27.7 32400700     27.7
#>  3 FB     2013-03-05  27.9  28.2  27.2  27.5 40622200     27.5
#>  4 FB     2013-03-06  28.1  28.1  27.4  27.5 33532600     27.5
#>  5 FB     2013-03-07  27.6  28.7  27.5  28.6 74540200     28.6
#>  6 FB     2013-03-08  28.4  28.5  27.7  28.0 44198900     28.0
#>  7 FB     2013-03-11  28.0  28.6  27.8  28.1 35642100     28.1
#>  8 FB     2013-03-12  28.1  28.3  27.6  27.8 27569600     27.8
#>  9 FB     2013-03-13  27.6  27.6  26.9  27.1 39619500     27.1
#> 10 FB     2013-03-14  27.1  27.4  26.8  27.0 27646400     27.0
#> # ... with 706 more rows

1
2
3

# 提取2013年1月2日至2013年12月31日的数据
FB_tbl_time %>% 
  filter_time(time_formula = "2013-01-02" ~ "2013-12-31")

#> # A time tibble: 252 x 8
#> # Index: date
#>    symbol date        open  high   low close    volume adjusted
#>    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
#>  1 FB     2013-01-02  27.4  28.2  27.4  28.0  69846400     28.0
#>  2 FB     2013-01-03  27.9  28.5  27.6  27.8  63140600     27.8
#>  3 FB     2013-01-04  28.0  28.9  27.8  28.8  72715400     28.8
#>  4 FB     2013-01-07  28.7  29.8  28.6  29.4  83781800     29.4
#>  5 FB     2013-01-08  29.5  29.6  28.9  29.1  45871300     29.1
#>  6 FB     2013-01-09  29.7  30.6  29.5  30.6 104787700     30.6
#>  7 FB     2013-01-10  30.6  31.5  30.3  31.3  95316400     31.3
#>  8 FB     2013-01-11  31.3  32.0  31.1  31.7  89598000     31.7
#>  9 FB     2013-01-14  32.1  32.2  30.6  31.0  98892800     31.0
#> 10 FB     2013-01-15  30.6  31.7  29.9  30.1 173242600     30.1
#> # ... with 242 more rows

1	# 注：这里`~`两边的时间/日期都包含在内

利用dplyr独立完成这一操作的代码如下：

1 2	FB_tbl_time %>% filter(date >= as.Date("2013-01-02"), date <= as.Date("2013-12-31"))

#> # A time tibble: 252 x 8
#> # Index: date
#>    symbol date        open  high   low close    volume adjusted
#>    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
#>  1 FB     2013-01-02  27.4  28.2  27.4  28.0  69846400     28.0
#>  2 FB     2013-01-03  27.9  28.5  27.6  27.8  63140600     27.8
#>  3 FB     2013-01-04  28.0  28.9  27.8  28.8  72715400     28.8
#>  4 FB     2013-01-07  28.7  29.8  28.6  29.4  83781800     29.4
#>  5 FB     2013-01-08  29.5  29.6  28.9  29.1  45871300     29.1
#>  6 FB     2013-01-09  29.7  30.6  29.5  30.6 104787700     30.6
#>  7 FB     2013-01-10  30.6  31.5  30.3  31.3  95316400     31.3
#>  8 FB     2013-01-11  31.3  32.0  31.1  31.7  89598000     31.7
#>  9 FB     2013-01-14  32.1  32.2  30.6  31.0  98892800     31.0
#> 10 FB     2013-01-15  30.6  31.7  29.9  30.1 173242600     30.1
#> # ... with 242 more rows

可以看到，代码量增加了很多。

1
2
3

# 提取2013年1月至2014年2月的数据
FB_tbl_time %>% 
  filter_time("2013-01" ~ "2014-02")

#> # A time tibble: 292 x 8
#> # Index: date
#>    symbol date        open  high   low close    volume adjusted
#>    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
#>  1 FB     2013-01-02  27.4  28.2  27.4  28.0  69846400     28.0
#>  2 FB     2013-01-03  27.9  28.5  27.6  27.8  63140600     27.8
#>  3 FB     2013-01-04  28.0  28.9  27.8  28.8  72715400     28.8
#>  4 FB     2013-01-07  28.7  29.8  28.6  29.4  83781800     29.4
#>  5 FB     2013-01-08  29.5  29.6  28.9  29.1  45871300     29.1
#>  6 FB     2013-01-09  29.7  30.6  29.5  30.6 104787700     30.6
#>  7 FB     2013-01-10  30.6  31.5  30.3  31.3  95316400     31.3
#>  8 FB     2013-01-11  31.3  32.0  31.1  31.7  89598000     31.7
#>  9 FB     2013-01-14  32.1  32.2  30.6  31.0  98892800     31.0
#> 10 FB     2013-01-15  30.6  31.7  29.9  30.1 173242600     30.1
#> # ... with 282 more rows

当然，利用tibbletime代码还可以进一步的缩减。

1
2
3

# 提取2013年一整年的数据
FB_tbl_time %>% 
  filter_time(time_formula = "2013" ~ "2013")

#> # A time tibble: 252 x 8
#> # Index: date
#>    symbol date        open  high   low close    volume adjusted
#>    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
#>  1 FB     2013-01-02  27.4  28.2  27.4  28.0  69846400     28.0
#>  2 FB     2013-01-03  27.9  28.5  27.6  27.8  63140600     27.8
#>  3 FB     2013-01-04  28.0  28.9  27.8  28.8  72715400     28.8
#>  4 FB     2013-01-07  28.7  29.8  28.6  29.4  83781800     29.4
#>  5 FB     2013-01-08  29.5  29.6  28.9  29.1  45871300     29.1
#>  6 FB     2013-01-09  29.7  30.6  29.5  30.6 104787700     30.6
#>  7 FB     2013-01-10  30.6  31.5  30.3  31.3  95316400     31.3
#>  8 FB     2013-01-11  31.3  32.0  31.1  31.7  89598000     31.7
#>  9 FB     2013-01-14  32.1  32.2  30.6  31.0  98892800     31.0
#> 10 FB     2013-01-15  30.6  31.7  29.9  30.1 173242600     30.1
#> # ... with 242 more rows

还长了，再缩减一下

1
2
3

# 提取2013年一整年的数据
FB_tbl_time %>% 
  filter_time(time_formula = ~ "2013")

#> # A time tibble: 252 x 8
#> # Index: date
#>    symbol date        open  high   low close    volume adjusted
#>    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
#>  1 FB     2013-01-02  27.4  28.2  27.4  28.0  69846400     28.0
#>  2 FB     2013-01-03  27.9  28.5  27.6  27.8  63140600     27.8
#>  3 FB     2013-01-04  28.0  28.9  27.8  28.8  72715400     28.8
#>  4 FB     2013-01-07  28.7  29.8  28.6  29.4  83781800     29.4
#>  5 FB     2013-01-08  29.5  29.6  28.9  29.1  45871300     29.1
#>  6 FB     2013-01-09  29.7  30.6  29.5  30.6 104787700     30.6
#>  7 FB     2013-01-10  30.6  31.5  30.3  31.3  95316400     31.3
#>  8 FB     2013-01-11  31.3  32.0  31.1  31.7  89598000     31.7
#>  9 FB     2013-01-14  32.1  32.2  30.6  31.0  98892800     31.0
#> 10 FB     2013-01-15  30.6  31.7  29.9  30.1 173242600     30.1
#> # ... with 242 more rows

1
2
3

# 提取2015年3月份的数据
FB_tbl_time %>% 
  filter_time(time_formula = ~ "2015-03")

#> # A time tibble: 22 x 8
#> # Index: date
#>    symbol date        open  high   low close   volume adjusted
#>    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>
#>  1 FB     2015-03-02  79.0  79.9  78.5  79.8 21662500     79.8
#>  2 FB     2015-03-03  79.6  79.7  78.5  79.6 18635000     79.6
#>  3 FB     2015-03-04  79.3  81.2  78.8  80.9 28126700     80.9
#>  4 FB     2015-03-05  81.2  82.0  81.1  81.2 27825700     81.2
#>  5 FB     2015-03-06  80.9  81.3  79.8  80.0 24488600     80.0
#>  6 FB     2015-03-09  79.7  79.9  78.6  79.4 18925100     79.4
#>  7 FB     2015-03-10  78.5  79.3  77.6  77.6 23067100     77.6
#>  8 FB     2015-03-11  77.8  78.4  77.3  77.6 20215700     77.6
#>  9 FB     2015-03-12  78.1  79.1  77.9  78.9 16093300     78.9
#> 10 FB     2015-03-13  78.6  79.4  77.7  78.1 18557300     78.1
#> # ... with 12 more rows

两个关键词参数

start
end

1
2
3

# 提取从序列开始到2015年的所有数据，包含2015年
FB_tbl_time %>% 
  filter_time(time_formula = "start" ~ "2015")

#> # A time tibble: 756 x 8
#> # Index: date
#>    symbol date        open  high   low close    volume adjusted
#>    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
#>  1 FB     2013-01-02  27.4  28.2  27.4  28.0  69846400     28.0
#>  2 FB     2013-01-03  27.9  28.5  27.6  27.8  63140600     27.8
#>  3 FB     2013-01-04  28.0  28.9  27.8  28.8  72715400     28.8
#>  4 FB     2013-01-07  28.7  29.8  28.6  29.4  83781800     29.4
#>  5 FB     2013-01-08  29.5  29.6  28.9  29.1  45871300     29.1
#>  6 FB     2013-01-09  29.7  30.6  29.5  30.6 104787700     30.6
#>  7 FB     2013-01-10  30.6  31.5  30.3  31.3  95316400     31.3
#>  8 FB     2013-01-11  31.3  32.0  31.1  31.7  89598000     31.7
#>  9 FB     2013-01-14  32.1  32.2  30.6  31.0  98892800     31.0
#> 10 FB     2013-01-15  30.6  31.7  29.9  30.1 173242600     30.1
#> # ... with 746 more rows

1
2
3

# 提取从2015年到序列结束的所有数据，包含2015年
FB_tbl_time %>% 
  filter_time(time_formula = "2015" ~ "end")

#> # A time tibble: 504 x 8
#> # Index: date
#>    symbol date        open  high   low close   volume adjusted
#>    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>
#>  1 FB     2015-01-02  78.6  78.9  77.7  78.4 18177500     78.4
#>  2 FB     2015-01-05  78.0  79.2  76.9  77.2 26452200     77.2
#>  3 FB     2015-01-06  77.2  77.6  75.4  76.2 27399300     76.2
#>  4 FB     2015-01-07  76.8  77.4  75.8  76.2 22045300     76.2
#>  5 FB     2015-01-08  76.7  78.2  76.1  78.2 23961000     78.2
#>  6 FB     2015-01-09  78.2  78.6  77.2  77.7 21157000     77.7
#>  7 FB     2015-01-12  77.8  78.0  76.2  76.7 19190200     76.7
#>  8 FB     2015-01-13  77.2  78.1  75.8  76.4 25179600     76.4
#>  9 FB     2015-01-14  76.4  77.2  76.0  76.3 25741000     76.3
#> 10 FB     2015-01-15  76.4  76.6  73.5  74.1 34134000     74.1
#> # ... with 494 more rows

对分组后的`tbl_time`对象进行取子集操作

# Facebook, Amazon, Netify, Google
data("FANG")
FANG
#> # A tibble: 4,032 x 8
#>    symbol date        open  high   low close    volume adjusted
#>    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
#>  1 FB     2013-01-02  27.4  28.2  27.4  28.0  69846400     28.0
#>  2 FB     2013-01-03  27.9  28.5  27.6  27.8  63140600     27.8
#>  3 FB     2013-01-04  28.0  28.9  27.8  28.8  72715400     28.8
#>  4 FB     2013-01-07  28.7  29.8  28.6  29.4  83781800     29.4
#>  5 FB     2013-01-08  29.5  29.6  28.9  29.1  45871300     29.1
#>  6 FB     2013-01-09  29.7  30.6  29.5  30.6 104787700     30.6
#>  7 FB     2013-01-10  30.6  31.5  30.3  31.3  95316400     31.3
#>  8 FB     2013-01-11  31.3  32.0  31.1  31.7  89598000     31.7
#>  9 FB     2013-01-14  32.1  32.2  30.6  31.0  98892800     31.0
#> 10 FB     2013-01-15  30.6  31.7  29.9  30.1 173242600     30.1
#> # ... with 4,022 more rows
FANG %>% 
  as_tbl_time(index = date) %>% 
  group_by(symbol) %>% 
  filter_time(time_formula = '2013-01-01' ~ '2013-01-04')
#> # A time tibble: 12 x 8
#> # Index:  date
#> # Groups: symbol [4]
#>    symbol date        open  high   low close   volume adjusted
#>    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>
#>  1 FB     2013-01-02  27.4  28.2  27.4  28.0 69846400     28.0
#>  2 FB     2013-01-03  27.9  28.5  27.6  27.8 63140600     27.8
#>  3 FB     2013-01-04  28.0  28.9  27.8  28.8 72715400     28.8
#>  4 AMZN   2013-01-02 256   258   253   257    3271000    257  
#>  5 AMZN   2013-01-03 257   261   256   258    2750900    258  
#>  6 AMZN   2013-01-04 258   260   257   259    1874200    259  
#>  7 NFLX   2013-01-02  95.2  95.8  90.7  92.0 19431300     13.1
#>  8 NFLX   2013-01-03  92.0  97.9  91.5  96.6 27912500     13.8
#>  9 NFLX   2013-01-04  96.5  97.7  95.5  96.0 17761100     13.7
#> 10 GOOG   2013-01-02 719   727   717   723    5101500    361  
#> 11 GOOG   2013-01-03 725   732   721   724    4653700    361  
#> 12 GOOG   2013-01-04 729   741   728   738    5547600    369

根据分钟/秒来提取子集

my_df <- tibble::tibble(
  date = lubridate::as_datetime(c("2018-01-05 20:54:16",
                                  "2018-01-05 20:54:18",
                                  "2018-01-05 20:55:54",
                                  "2018-01-05 20:54:24",
                                  "2018-01-05 20:55:44",
                                  "2018-01-05 20:54:34")),  
  x2 = rnorm(6)
)
my_df
#> # A tibble: 6 x 2
#>   date                    x2
#>   <dttm>               <dbl>
#> 1 2018-01-05 20:54:16 -0.343
#> 2 2018-01-05 20:54:18 -0.194
#> 3 2018-01-05 20:55:54  0.402
#> 4 2018-01-05 20:54:24 -0.753
#> 5 2018-01-05 20:55:44 -0.294
#> 6 2018-01-05 20:54:34 -1.97
my_df %>% 
  arrange(date) %>% 
  as_tbl_time(index = date) %>% 
  filter_time(time_formula = "2018-01-05 + 20:54:16" ~ "2018-01-05 + 20:54:44")
#> # A time tibble: 4 x 2
#> # Index: date
#>   date                    x2
#>   <dttm>               <dbl>
#> 1 2018-01-05 20:54:16 -0.343
#> 2 2018-01-05 20:54:18 -0.194
#> 3 2018-01-05 20:54:24 -0.753
#> 4 2018-01-05 20:54:34 -1.97

除了使用filter_time()函数进行取子集操作外，tibbletime还支持常规的[操作。

1	FB_tbl_time[~ "2013"]

#> # A time tibble: 252 x 8
#> # Index: date
#>    symbol date        open  high   low close    volume adjusted
#>    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
#>  1 FB     2013-01-02  27.4  28.2  27.4  28.0  69846400     28.0
#>  2 FB     2013-01-03  27.9  28.5  27.6  27.8  63140600     27.8
#>  3 FB     2013-01-04  28.0  28.9  27.8  28.8  72715400     28.8
#>  4 FB     2013-01-07  28.7  29.8  28.6  29.4  83781800     29.4
#>  5 FB     2013-01-08  29.5  29.6  28.9  29.1  45871300     29.1
#>  6 FB     2013-01-09  29.7  30.6  29.5  30.6 104787700     30.6
#>  7 FB     2013-01-10  30.6  31.5  30.3  31.3  95316400     31.3
#>  8 FB     2013-01-11  31.3  32.0  31.1  31.7  89598000     31.7
#>  9 FB     2013-01-14  32.1  32.2  30.6  31.0  98892800     31.0
#> 10 FB     2013-01-15  30.6  31.7  29.9  30.1 173242600     30.1
#> # ... with 242 more rows

1	FB_tbl_time["2013" ~ "2014-02", c(1, 2, 3)]

#> # A time tibble: 292 x 3
#> # Index: date
#>    symbol date        open
#>    <chr>  <date>     <dbl>
#>  1 FB     2013-01-02  27.4
#>  2 FB     2013-01-03  27.9
#>  3 FB     2013-01-04  28.0
#>  4 FB     2013-01-07  28.7
#>  5 FB     2013-01-08  29.5
#>  6 FB     2013-01-09  29.7
#>  7 FB     2013-01-10  30.6
#>  8 FB     2013-01-11  31.3
#>  9 FB     2013-01-14  32.1
#> 10 FB     2013-01-15  30.6
#> # ... with 282 more rows

利用`filter_time()`函数进行编程

1 2	date_var <- as.Date("2014-01-01") filter_time(FB_tbl_time, 'start' ~ date_var)

#> # A time tibble: 252 x 8
#> # Index: date
#>    symbol date        open  high   low close    volume adjusted
#>    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
#>  1 FB     2013-01-02  27.4  28.2  27.4  28.0  69846400     28.0
#>  2 FB     2013-01-03  27.9  28.5  27.6  27.8  63140600     27.8
#>  3 FB     2013-01-04  28.0  28.9  27.8  28.8  72715400     28.8
#>  4 FB     2013-01-07  28.7  29.8  28.6  29.4  83781800     29.4
#>  5 FB     2013-01-08  29.5  29.6  28.9  29.1  45871300     29.1
#>  6 FB     2013-01-09  29.7  30.6  29.5  30.6 104787700     30.6
#>  7 FB     2013-01-10  30.6  31.5  30.3  31.3  95316400     31.3
#>  8 FB     2013-01-11  31.3  32.0  31.1  31.7  89598000     31.7
#>  9 FB     2013-01-14  32.1  32.2  30.6  31.0  98892800     31.0
#> 10 FB     2013-01-15  30.6  31.7  29.9  30.1 173242600     30.1
#> # ... with 242 more rows

对`tbl_time`对象进行高频转低频

as_period()函数

as_period

#> function (.tbl_time, period = "yearly", start_date = NULL, side = "start", 
#>     include_endpoints = FALSE, ...) 
#> {
#>     UseMethod("as_period")
#> }
#> <environment: namespace:tibbletime>

1
2
3

# 将日数据转化为周数据
FB_tbl_time %>% 
  as_period(period = "weekly", side = "start")

#> # A time tibble: 209 x 8
#> # Index: date
#>    symbol date        open  high   low close   volume adjusted
#>    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>
#>  1 FB     2013-01-02  27.4  28.2  27.4  28.0 69846400     28.0
#>  2 FB     2013-01-07  28.7  29.8  28.6  29.4 83781800     29.4
#>  3 FB     2013-01-14  32.1  32.2  30.6  31.0 98892800     31.0
#>  4 FB     2013-01-22  29.8  30.9  29.7  30.7 55243300     30.7
#>  5 FB     2013-01-28  31.9  32.5  31.8  32.5 59682500     32.5
#>  6 FB     2013-02-04  29.1  29.2  28.0  28.1 92362200     28.1
#>  7 FB     2013-02-11  28.6  28.7  28.0  28.3 37361800     28.3
#>  8 FB     2013-02-19  28.2  29.1  28.1  28.9 49396400     28.9
#>  9 FB     2013-02-25  27.2  27.6  27.2  27.3 34652000     27.3
#> 10 FB     2013-03-04  27.8  28.1  27.4  27.7 32400700     27.7
#> # ... with 199 more rows

1 2	# side = "start"表示取每一个时间区间的第一个值 # 这里是取每周的第一个值

1
2
3

# 将日数据转化为月数据
FB_tbl_time %>% 
  as_period("monthly", side = "end")

#> # A time tibble: 48 x 8
#> # Index: date
#>    symbol date        open  high   low close    volume adjusted
#>    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
#>  1 FB     2013-01-31  29.2  31.5  28.7  31.0 190744900     31.0
#>  2 FB     2013-02-28  26.8  27.3  26.3  27.2  83027800     27.2
#>  3 FB     2013-03-28  26.1  26.2  25.5  25.6  28585700     25.6
#>  4 FB     2013-04-30  27.1  27.8  27.0  27.8  36245700     27.8
#>  5 FB     2013-05-31  24.6  25.0  24.3  24.4  35925000     24.4
#>  6 FB     2013-06-28  24.7  25.0  24.4  24.9  96778900     24.9
#>  7 FB     2013-07-31  38.0  38.3  36.3  36.8 154828700     36.8
#>  8 FB     2013-08-30  42.0  42.3  41.1  41.3  67735100     41.3
#>  9 FB     2013-09-30  50.1  51.6  49.8  50.2 100095000     50.2
#> 10 FB     2013-10-31  47.2  52.0  46.5  50.2 248809000     50.2
#> # ... with 38 more rows

1 2	# side = "end"表示取每一个时间区间的最后一个值 # 这里是取每月的最后一个值

1
2
3

# 将日数据转化为季数据
FB_tbl_time %>% 
  as_period(period = "quarterly", side = "start")

#> # A time tibble: 16 x 8
#> # Index: date
#>    symbol date        open  high   low close   volume adjusted
#>    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>
#>  1 FB     2013-01-02  27.4  28.2  27.4  28.0 69846400     28.0
#>  2 FB     2013-04-01  25.6  25.9  25.3  25.5 22249300     25.5
#>  3 FB     2013-07-01  25.0  25.1  24.6  24.8 20582200     24.8
#>  4 FB     2013-10-01  50.0  51.0  49.5  50.4 98114000     50.4
#>  5 FB     2014-01-02  54.8  55.2  54.2  54.7 43195500     54.7
#>  6 FB     2014-04-01  60.5  62.7  60.2  62.6 59291000     62.6
#>  7 FB     2014-07-01  67.6  68.4  67.4  68.1 33243000     68.1
#>  8 FB     2014-10-01  78.8  78.8  75.9  76.6 55090000     76.6
#>  9 FB     2015-01-02  78.6  78.9  77.7  78.4 18177500     78.4
#> 10 FB     2015-04-01  82.5  82.7  80.9  81.7 22058200     81.7
#> 11 FB     2015-07-01  86.8  87.9  86.5  86.9 25260000     86.9
#> 12 FB     2015-10-01  90.1  90.9  88.4  90.9 29283700     90.9
#> 13 FB     2016-01-04 102   102    99.8 102   37912400    102  
#> 14 FB     2016-04-01 114   116   113   116   24760500    116  
#> 15 FB     2016-07-01 114   115   114   114   14980000    114  
#> 16 FB     2016-10-03 128   129   128   129   13156900    129

1 2	# side = "start"表示取每一个时间区间的第一个值 # 这里是取每季度的第一个值

1
2
3

# 将日数据转化为年数据
FB_tbl_time %>% 
  as_period(period = "yearly", side = "start")

#> # A time tibble: 4 x 8
#> # Index: date
#>   symbol date        open  high   low close   volume adjusted
#>   <chr>  <date>     <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>
#> 1 FB     2013-01-02  27.4  28.2  27.4  28.0 69846400     28.0
#> 2 FB     2014-01-02  54.8  55.2  54.2  54.7 43195500     54.7
#> 3 FB     2015-01-02  78.6  78.9  77.7  78.4 18177500     78.4
#> 4 FB     2016-01-04 102   102    99.8 102   37912400    102

1 2	# side = "start"表示取每一个时间区间的第一个值 # 这里是取每年的第一个值

1
2
3

# 将日数据转化为每4天一次
FB_tbl_time %>% 
  as_period(period = "5 days", start_date = as.Date("2013-01-01"), side = "end")

#> # A time tibble: 292 x 8
#> # Index: date
#>    symbol date        open  high   low close    volume adjusted
#>    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
#>  1 FB     2013-01-04  28.0  28.9  27.8  28.8  72715400     28.8
#>  2 FB     2013-01-10  30.6  31.5  30.3  31.3  95316400     31.3
#>  3 FB     2013-01-15  30.6  31.7  29.9  30.1 173242600     30.1
#>  4 FB     2013-01-18  30.3  30.4  29.3  29.7  49631500     29.7
#>  5 FB     2013-01-25  31.4  31.9  31.1  31.5  54363600     31.5
#>  6 FB     2013-01-30  31.0  31.5  30.9  31.2  87682100     31.2
#>  7 FB     2013-02-04  29.1  29.2  28.0  28.1  92362200     28.1
#>  8 FB     2013-02-08  28.9  29.2  28.5  28.5  37708800     28.5
#>  9 FB     2013-02-14  28.0  28.6  28.0  28.5  35615800     28.5
#> 10 FB     2013-02-19  28.2  29.1  28.1  28.9  49396400     28.9
#> # ... with 282 more rows

1 2	# 注：这里的`start_date`必须小于或者等于时间索引列的最小值 # 这里原始索引列的最小值为`2013-01-02`，这里`start_date`设置为`2013-01-01`，做了更改。

# 操作分组数据
data("FANG")
FANG %>% 
  group_by(symbol) %>% 
  as_tbl_time(index = date) %>% 
  as_period(period = "2 years", side = "start")

#> # A time tibble: 8 x 8
#> # Index:  date
#> # Groups: symbol [4]
#>   symbol date        open  high   low close   volume adjusted
#>   <chr>  <date>     <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>
#> 1 FB     2013-01-02  27.4  28.2  27.4  28.0 69846400     28.0
#> 2 FB     2015-01-02  78.6  78.9  77.7  78.4 18177500     78.4
#> 3 AMZN   2013-01-02 256   258   253   257    3271000    257  
#> 4 AMZN   2015-01-02 313   315   307   309    2783200    309  
#> 5 NFLX   2013-01-02  95.2  95.8  90.7  92.0 19431300     13.1
#> 6 NFLX   2015-01-02 344   352   341   349   13475000     49.8
#> 7 GOOG   2013-01-02 719   727   717   723    5101500    361  
#> 8 GOOG   2015-01-02 529   531   524   525    1447500    525

滚动计算

rollify()函数

rollify

#> function (.f, window = 1, unlist = TRUE, na_value = NULL) 
#> {
#>     .f <- purrr::as_mapper(.f)
#>     function(...) {
#>         roller(..., .f = .f, window = window, unlist = unlist, 
#>             na_value = na_value)
#>     }
#> }
#> <environment: namespace:tibbletime>

# 计算5期的移动平均
mean_5 <- rollify(mean, window = 5) # `window`表示滚动窗口
mean_5
#> function (...) 
#> {
#>     roller(..., .f = .f, window = window, unlist = unlist, na_value = na_value)
#> }
#> <environment: 0x000000001903d058>
FB_tbl_time %>% 
  mutate(roll_mean = mean_5(adjusted))
#> # A time tibble: 1,008 x 9
#> # Index: date
#>    symbol date        open  high   low close    volume adjusted roll_mean
#>    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>     <dbl>
#>  1 FB     2013-01-02  27.4  28.2  27.4  28.0  69846400     28.0      NA  
#>  2 FB     2013-01-03  27.9  28.5  27.6  27.8  63140600     27.8      NA  
#>  3 FB     2013-01-04  28.0  28.9  27.8  28.8  72715400     28.8      NA  
#>  4 FB     2013-01-07  28.7  29.8  28.6  29.4  83781800     29.4      NA  
#>  5 FB     2013-01-08  29.5  29.6  28.9  29.1  45871300     29.1      28.6
#>  6 FB     2013-01-09  29.7  30.6  29.5  30.6 104787700     30.6      29.1
#>  7 FB     2013-01-10  30.6  31.5  30.3  31.3  95316400     31.3      29.8
#>  8 FB     2013-01-11  31.3  32.0  31.1  31.7  89598000     31.7      30.4
#>  9 FB     2013-01-14  32.1  32.2  30.6  31.0  98892800     31.0      30.7
#> 10 FB     2013-01-15  30.6  31.7  29.9  30.1 173242600     30.1      30.9
#> # ... with 998 more rows
FB_tbl_time %>% 
  pull(adjusted) %>% 
  .[1:5] %>% 
  mean(., na.rm = TRUE) # 正是`roll_mean`这一列的计算方法
#> [1] 28.602

# 计算2，3，4期的移动平均
rolling_mean_2 <- rollify(mean, window = 2)
rolling_mean_3 <- rollify(mean, window = 3)
rolling_mean_4 <- rollify(mean, window = 4)
FB %>% mutate(
  rm10 = rolling_mean_2(adjusted),
  rm20 = rolling_mean_3(adjusted),
  rm30 = rolling_mean_4(adjusted)
)

#> # A tibble: 1,008 x 11
#>    symb~ date        open  high   low close volume adju~  rm10  rm20  rm30
#>    <chr> <date>     <dbl> <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
#>  1 FB    2013-01-02  27.4  28.2  27.4  28.0 6.98e7  28.0  NA    NA    NA  
#>  2 FB    2013-01-03  27.9  28.5  27.6  27.8 6.31e7  27.8  27.9  NA    NA  
#>  3 FB    2013-01-04  28.0  28.9  27.8  28.8 7.27e7  28.8  28.3  28.2  NA  
#>  4 FB    2013-01-07  28.7  29.8  28.6  29.4 8.38e7  29.4  29.1  28.6  28.5
#>  5 FB    2013-01-08  29.5  29.6  28.9  29.1 4.59e7  29.1  29.2  29.1  28.8
#>  6 FB    2013-01-09  29.7  30.6  29.5  30.6 1.05e8  30.6  29.8  29.7  29.5
#>  7 FB    2013-01-10  30.6  31.5  30.3  31.3 9.53e7  31.3  30.9  30.3  30.1
#>  8 FB    2013-01-11  31.3  32.0  31.1  31.7 8.96e7  31.7  31.5  31.2  30.7
#>  9 FB    2013-01-14  32.1  32.2  30.6  31.0 9.89e7  31.0  31.3  31.3  31.1
#> 10 FB    2013-01-15  30.6  31.7  29.9  30.1 1.73e8  30.1  30.5  30.9  31.0
#> # ... with 998 more rows

# 开盘价和收盘价均价的5期移动平均值
rolling_avg_sum <- rollify(~ mean(.x + .y), window = 5)
mutate(FB_tbl_time, avg_sum = rolling_avg_sum(open, close))

#> # A time tibble: 1,008 x 9
#> # Index: date
#>    symbol date        open  high   low close    volume adjusted avg_sum
#>    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>   <dbl>
#>  1 FB     2013-01-02  27.4  28.2  27.4  28.0  69846400     28.0    NA  
#>  2 FB     2013-01-03  27.9  28.5  27.6  27.8  63140600     27.8    NA  
#>  3 FB     2013-01-04  28.0  28.9  27.8  28.8  72715400     28.8    NA  
#>  4 FB     2013-01-07  28.7  29.8  28.6  29.4  83781800     29.4    NA  
#>  5 FB     2013-01-08  29.5  29.6  28.9  29.1  45871300     29.1    56.9
#>  6 FB     2013-01-09  29.7  30.6  29.5  30.6 104787700     30.6    57.9
#>  7 FB     2013-01-10  30.6  31.5  30.3  31.3  95316400     31.3    59.1
#>  8 FB     2013-01-11  31.3  32.0  31.1  31.7  89598000     31.7    60.4
#>  9 FB     2013-01-14  32.1  32.2  30.6  31.0  98892800     31.0    61.4
#> 10 FB     2013-01-15  30.6  31.7  29.9  30.1 173242600     30.1    61.8
#> # ... with 998 more rows

# 实现list-column
summary_df <- function(x) {
  data.frame(  
    rolled_summary_type = c("mean", "sd",  "min",  "max",  "median"),
    rolled_summary_val  = c(mean(x), sd(x), min(x), max(x), median(x))
  )
}
# A rolling version, with unlist = FALSE
rolling_summary <- rollify(~ summary_df(.x), window = 5, 
                           unlist = FALSE)
FB_summarised <- mutate(FB_tbl_time, summary_list_col = rolling_summary(adjusted))
FB_summarised

#> # A time tibble: 1,008 x 9
#> # Index: date
#>    symbol date        open  high   low close    volume adjusted summary_l~
#>    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl> <list>    
#>  1 FB     2013-01-02  27.4  28.2  27.4  28.0  69846400     28.0 <lgl [1]> 
#>  2 FB     2013-01-03  27.9  28.5  27.6  27.8  63140600     27.8 <lgl [1]> 
#>  3 FB     2013-01-04  28.0  28.9  27.8  28.8  72715400     28.8 <lgl [1]> 
#>  4 FB     2013-01-07  28.7  29.8  28.6  29.4  83781800     29.4 <lgl [1]> 
#>  5 FB     2013-01-08  29.5  29.6  28.9  29.1  45871300     29.1 <data.fra~
#>  6 FB     2013-01-09  29.7  30.6  29.5  30.6 104787700     30.6 <data.fra~
#>  7 FB     2013-01-10  30.6  31.5  30.3  31.3  95316400     31.3 <data.fra~
#>  8 FB     2013-01-11  31.3  32.0  31.1  31.7  89598000     31.7 <data.fra~
#>  9 FB     2013-01-14  32.1  32.2  30.6  31.0  98892800     31.0 <data.fra~
#> 10 FB     2013-01-15  30.6  31.7  29.9  30.1 173242600     30.1 <data.fra~
#> # ... with 998 more rows

1
2
3

FB_summarised %>% 
  filter(!is.na(summary_list_col)) %>%
  tidyr::unnest()

#> # A time tibble: 5,020 x 10
#> # Index: date
#>    symbol date        open  high   low close    volume adjus~ roll~ rolle~
#>    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>  <dbl> <fct>  <dbl>
#>  1 FB     2013-01-08  29.5  29.6  28.9  29.1  45871300   29.1 mean  28.6  
#>  2 FB     2013-01-08  29.5  29.6  28.9  29.1  45871300   29.1 sd     0.700
#>  3 FB     2013-01-08  29.5  29.6  28.9  29.1  45871300   29.1 min   27.8  
#>  4 FB     2013-01-08  29.5  29.6  28.9  29.1  45871300   29.1 max   29.4  
#>  5 FB     2013-01-08  29.5  29.6  28.9  29.1  45871300   29.1 medi~ 28.8  
#>  6 FB     2013-01-09  29.7  30.6  29.5  30.6 104787700   30.6 mean  29.1  
#>  7 FB     2013-01-09  29.7  30.6  29.5  30.6 104787700   30.6 sd     1.03 
#>  8 FB     2013-01-09  29.7  30.6  29.5  30.6 104787700   30.6 min   27.8  
#>  9 FB     2013-01-09  29.7  30.6  29.5  30.6 104787700   30.6 max   30.6  
#> 10 FB     2013-01-09  29.7  30.6  29.5  30.6 104787700   30.6 medi~ 29.1  
#> # ... with 5,010 more rows

# 计算移动平均后缺失值的处理问题
rolling_summary <- rollify(~summary_df(.x), window = 5, 
                           unlist = FALSE, na_value = data.frame())
FB_summarised <- mutate(FB, summary_list_col = rolling_summary(adjusted))
FB_summarised
#> # A tibble: 1,008 x 9
#>    symbol date        open  high   low close    volume adjusted summary_l~
#>    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl> <list>    
#>  1 FB     2013-01-02  27.4  28.2  27.4  28.0  69846400     28.0 <data.fra~
#>  2 FB     2013-01-03  27.9  28.5  27.6  27.8  63140600     27.8 <data.fra~
#>  3 FB     2013-01-04  28.0  28.9  27.8  28.8  72715400     28.8 <data.fra~
#>  4 FB     2013-01-07  28.7  29.8  28.6  29.4  83781800     29.4 <data.fra~
#>  5 FB     2013-01-08  29.5  29.6  28.9  29.1  45871300     29.1 <data.fra~
#>  6 FB     2013-01-09  29.7  30.6  29.5  30.6 104787700     30.6 <data.fra~
#>  7 FB     2013-01-10  30.6  31.5  30.3  31.3  95316400     31.3 <data.fra~
#>  8 FB     2013-01-11  31.3  32.0  31.1  31.7  89598000     31.7 <data.fra~
#>  9 FB     2013-01-14  32.1  32.2  30.6  31.0  98892800     31.0 <data.fra~
#> 10 FB     2013-01-15  30.6  31.7  29.9  30.1 173242600     30.1 <data.fra~
#> # ... with 998 more rows
FB_summarised %>% 
  tidyr::unnest()
#> # A tibble: 5,020 x 10
#>    symbol date        open  high   low close    volume adjus~ roll~ rolle~
#>    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>  <dbl> <fct>  <dbl>
#>  1 FB     2013-01-08  29.5  29.6  28.9  29.1  45871300   29.1 mean  28.6  
#>  2 FB     2013-01-08  29.5  29.6  28.9  29.1  45871300   29.1 sd     0.700
#>  3 FB     2013-01-08  29.5  29.6  28.9  29.1  45871300   29.1 min   27.8  
#>  4 FB     2013-01-08  29.5  29.6  28.9  29.1  45871300   29.1 max   29.4  
#>  5 FB     2013-01-08  29.5  29.6  28.9  29.1  45871300   29.1 medi~ 28.8  
#>  6 FB     2013-01-09  29.7  30.6  29.5  30.6 104787700   30.6 mean  29.1  
#>  7 FB     2013-01-09  29.7  30.6  29.5  30.6 104787700   30.6 sd     1.03 
#>  8 FB     2013-01-09  29.7  30.6  29.5  30.6 104787700   30.6 min   27.8  
#>  9 FB     2013-01-09  29.7  30.6  29.5  30.6 104787700   30.6 max   30.6  
#> 10 FB     2013-01-09  29.7  30.6  29.5  30.6 104787700   30.6 medi~ 29.1  
#> # ... with 5,010 more rows

创建`tbl_time`对象

create_series()函数

1 2	# 创建一整年的数据，每2天为一个间隔 create_series(~'2013', period = '2 day')

#> # A time tibble: 183 x 1
#> # Index: date
#>    date               
#>    <dttm>             
#>  1 2013-01-01 00:00:00
#>  2 2013-01-03 00:00:00
#>  3 2013-01-05 00:00:00
#>  4 2013-01-07 00:00:00
#>  5 2013-01-09 00:00:00
#>  6 2013-01-11 00:00:00
#>  7 2013-01-13 00:00:00
#>  8 2013-01-15 00:00:00
#>  9 2013-01-17 00:00:00
#> 10 2013-01-19 00:00:00
#> # ... with 173 more rows

1 2	# 2013年的日度数据 create_series(~'2013', period = 'daily')

#> # A time tibble: 365 x 1
#> # Index: date
#>    date               
#>    <dttm>             
#>  1 2013-01-01 00:00:00
#>  2 2013-01-02 00:00:00
#>  3 2013-01-03 00:00:00
#>  4 2013-01-04 00:00:00
#>  5 2013-01-05 00:00:00
#>  6 2013-01-06 00:00:00
#>  7 2013-01-07 00:00:00
#>  8 2013-01-08 00:00:00
#>  9 2013-01-09 00:00:00
#> 10 2013-01-10 00:00:00
#> # ... with 355 more rows

1 2	# 2013年的季度数据 create_series(~'2013', period = '1 quarter')

#> # A time tibble: 4 x 1
#> # Index: date
#>   date               
#>   <dttm>             
#> 1 2013-01-01 00:00:00
#> 2 2013-04-01 00:00:00
#> 3 2013-07-01 00:00:00
#> 4 2013-10-01 00:00:00

1 2	# 2013年至2015年的季度数据 create_series('2013' ~ '2015', period = '1 day')

#> # A time tibble: 1,095 x 1
#> # Index: date
#>    date               
#>    <dttm>             
#>  1 2013-01-01 00:00:00
#>  2 2013-01-02 00:00:00
#>  3 2013-01-03 00:00:00
#>  4 2013-01-04 00:00:00
#>  5 2013-01-05 00:00:00
#>  6 2013-01-06 00:00:00
#>  7 2013-01-07 00:00:00
#>  8 2013-01-08 00:00:00
#>  9 2013-01-09 00:00:00
#> 10 2013-01-10 00:00:00
#> # ... with 1,085 more rows

1 2	# 2012年1月至2012年2月每分钟的高频数据 create_series('2012-01' ~ '2012-02', period = '1 minute')

#> # A time tibble: 86,400 x 1
#> # Index: date
#>    date               
#>    <dttm>             
#>  1 2012-01-01 00:00:00
#>  2 2012-01-01 00:01:00
#>  3 2012-01-01 00:02:00
#>  4 2012-01-01 00:03:00
#>  5 2012-01-01 00:04:00
#>  6 2012-01-01 00:05:00
#>  7 2012-01-01 00:06:00
#>  8 2012-01-01 00:07:00
#>  9 2012-01-01 00:08:00
#> 10 2012-01-01 00:09:00
#> # ... with 86,390 more rows

1 2	# 2011年1月1日每秒钟的高频数据 create_series('2011-01-01 12:10:00' ~ '2011-01-01 12:12:00', period = '1 second')

#> # A time tibble: 121 x 1
#> # Index: date
#>    date               
#>    <dttm>             
#>  1 2011-01-01 12:10:00
#>  2 2011-01-01 12:10:01
#>  3 2011-01-01 12:10:02
#>  4 2011-01-01 12:10:03
#>  5 2011-01-01 12:10:04
#>  6 2011-01-01 12:10:05
#>  7 2011-01-01 12:10:06
#>  8 2011-01-01 12:10:07
#>  9 2011-01-01 12:10:08
#> 10 2011-01-01 12:10:09
#> # ... with 111 more rows

1 2	# 创建类型为`Date`的序列 create_series(~'2013', period = '1 day', class = "Date")

#> # A time tibble: 365 x 1
#> # Index: date
#>    date      
#>    <date>    
#>  1 2013-01-01
#>  2 2013-01-02
#>  3 2013-01-03
#>  4 2013-01-04
#>  5 2013-01-05
#>  6 2013-01-06
#>  7 2013-01-07
#>  8 2013-01-08
#>  9 2013-01-09
#> 10 2013-01-10
#> # ... with 355 more rows

1 2	# 创建类型为`yearmon`的序列 create_series(~'2013', period = '1 month', class = "yearmon")

#> # A time tibble: 12 x 1
#> # Index: date
#>    date         
#>    <S3: yearmon>
#>  1 1月 2013     
#>  2 2月 2013     
#>  3 3月 2013     
#>  4 4月 2013     
#>  5 5月 2013     
#>  6 6月 2013     
#>  7 7月 2013     
#>  8 8月 2013     
#>  9 9月 2013     
#> 10 10月 2013    
#> 11 11月 2013    
#> 12 12月 2013

1
2
3

# 创建类型为`hms`的序列
# `time_formula`的格式为`HH:MM:SS`
create_series(time_formula = '00:00:00' ~ '12:00:00', period = '1 second' , class = "hms")

#> # A time tibble: 43,201 x 1
#> # Index: date
#>    date  
#>    <time>
#>  1 00'00"
#>  2 00'01"
#>  3 00'02"
#>  4 00'03"
#>  5 00'04"
#>  6 00'05"
#>  7 00'06"
#>  8 00'07"
#>  9 00'08"
#> 10 00'09"
#> # ... with 43,191 more rows

分组操作

collapse_index()函数

1	collapse_index

#> function (index, period = "yearly", start_date = NULL, side = "end", 
#>     ...) 
#> {
#>     index_part <- partition_index(index, period, start_date)
#>     index_num <- to_posixct_numeric(index)
#>     if (side == "start") {
#>         pos <- match(unique(index_part), index_part)
#>         index_at_pos <- index_num[pos]
#>         reps <- diff(c(pos, length(index_part) + 1))
#>     }
#>     else if (side == "end") {
#>         pos <- length(index_part) - match(unique(index_part), 
#>             rev(index_part)) + 1
#>         index_at_pos <- index_num[pos]
#>         reps <- diff(c(0, pos))
#>     }
#>     new_index <- posixct_numeric_to_datetime(x = rep(index_at_pos, 
#>         reps), class = get_index_col_class(index), tz = get_index_col_time_zone(index))
#>     new_index
#> }
#> <environment: namespace:tibbletime>

# 计算每个月的均值
FB_tbl_time %>%
  select(-symbol) %>%
  mutate(date = collapse_index(index = date, period = "monthly", side = "end")) %>%
  group_by(date) %>%
  summarise_all(mean)
#> # A time tibble: 48 x 7
#> # Index: date
#>    date        open  high   low close   volume adjusted
#>    <date>     <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>
#>  1 2013-01-31  30.2  30.8  29.8  30.3 79802462     30.3
#>  2 2013-02-28  28.3  28.6  27.7  28.1 50402095     28.1
#>  3 2013-03-28  26.9  27.2  26.5  26.8 36359025     26.8
#>  4 2013-04-30  26.6  27.0  26.2  26.6 33568600     26.6
#>  5 2013-05-31  26.4  26.6  25.9  26.1 44640673     26.1
#>  6 2013-06-28  24.0  24.3  23.7  23.9 39416575     23.9
#>  7 2013-07-31  27.7  28.2  27.4  27.9 65364414     27.9
#>  8 2013-08-30  38.7  39.3  38.2  38.7 61136095     38.7
#>  9 2013-09-30  45.5  46.3  44.9  45.8 79154190     45.8
#> 10 2013-10-31  50.7  51.5  49.7  50.5 88375435     50.5
#> # ... with 38 more rows
# 等同于
FB_tbl_time %>%
  select(-symbol) %>%
  mutate(date = collapse_index(index = date, period = "1 month", side = "end")) %>%
  group_by(date) %>%
  summarise_all(mean)
#> # A time tibble: 48 x 7
#> # Index: date
#>    date        open  high   low close   volume adjusted
#>    <date>     <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>
#>  1 2013-01-31  30.2  30.8  29.8  30.3 79802462     30.3
#>  2 2013-02-28  28.3  28.6  27.7  28.1 50402095     28.1
#>  3 2013-03-28  26.9  27.2  26.5  26.8 36359025     26.8
#>  4 2013-04-30  26.6  27.0  26.2  26.6 33568600     26.6
#>  5 2013-05-31  26.4  26.6  25.9  26.1 44640673     26.1
#>  6 2013-06-28  24.0  24.3  23.7  23.9 39416575     23.9
#>  7 2013-07-31  27.7  28.2  27.4  27.9 65364414     27.9
#>  8 2013-08-30  38.7  39.3  38.2  38.7 61136095     38.7
#>  9 2013-09-30  45.5  46.3  44.9  45.8 79154190     45.8
#> 10 2013-10-31  50.7  51.5  49.7  50.5 88375435     50.5
#> # ... with 38 more rows

来看下collpase_index()函数究竟做了什么：

FB_tbl_time %>%
  select(-symbol) %>%
  # 这里side = "start"/"end"，前者是该时间区间的第一个数，后者是最后一个数
  mutate(date = collapse_index(index = date, period = "monthly", side = "end"))

#> # A time tibble: 1,008 x 7
#> # Index: date
#>    date        open  high   low close    volume adjusted
#>    <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
#>  1 2013-01-31  27.4  28.2  27.4  28.0  69846400     28.0
#>  2 2013-01-31  27.9  28.5  27.6  27.8  63140600     27.8
#>  3 2013-01-31  28.0  28.9  27.8  28.8  72715400     28.8
#>  4 2013-01-31  28.7  29.8  28.6  29.4  83781800     29.4
#>  5 2013-01-31  29.5  29.6  28.9  29.1  45871300     29.1
#>  6 2013-01-31  29.7  30.6  29.5  30.6 104787700     30.6
#>  7 2013-01-31  30.6  31.5  30.3  31.3  95316400     31.3
#>  8 2013-01-31  31.3  32.0  31.1  31.7  89598000     31.7
#>  9 2013-01-31  32.1  32.2  30.6  31.0  98892800     31.0
#> 10 2013-01-31  30.6  31.7  29.9  30.1 173242600     30.1
#> # ... with 998 more rows

很明显，collpase_index()函数创建了一个同FB_tbl_time行数相同的时间列，然后dplyr中的group_by()函数根据
这一列进行分组，最后通过聚合函数mean()计算每个月的均值。

再看一个例子

# Facebook, Amazon, Netflix and Google stocks
data(FANG)
FANG
#> # A tibble: 4,032 x 8
#>    symbol date        open  high   low close    volume adjusted
#>    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
#>  1 FB     2013-01-02  27.4  28.2  27.4  28.0  69846400     28.0
#>  2 FB     2013-01-03  27.9  28.5  27.6  27.8  63140600     27.8
#>  3 FB     2013-01-04  28.0  28.9  27.8  28.8  72715400     28.8
#>  4 FB     2013-01-07  28.7  29.8  28.6  29.4  83781800     29.4
#>  5 FB     2013-01-08  29.5  29.6  28.9  29.1  45871300     29.1
#>  6 FB     2013-01-09  29.7  30.6  29.5  30.6 104787700     30.6
#>  7 FB     2013-01-10  30.6  31.5  30.3  31.3  95316400     31.3
#>  8 FB     2013-01-11  31.3  32.0  31.1  31.7  89598000     31.7
#>  9 FB     2013-01-14  32.1  32.2  30.6  31.0  98892800     31.0
#> 10 FB     2013-01-15  30.6  31.7  29.9  30.1 173242600     30.1
#> # ... with 4,022 more rows
# 根据时间索引列和股票代码分组
FANG %>% 
  as_tbl_time(date) %>%
  group_by(symbol) %>%
  
  # 日度数据转化为年度数据
  mutate(date = collapse_index(date, period = "yearly")) %>%
  
  # Additionally group by date (yearly)
  group_by(date, add = TRUE) %>%
  
  # Perform a yearly summary for each symbol
  summarise(
    adj_min   = min(adjusted),
    adj_max   = max(adjusted),
    adj_range = adj_max - adj_min
  )
#> # A time tibble: 16 x 5
#> # Index:  date
#> # Groups: symbol [?]
#>    symbol date       adj_min adj_max adj_range
#>    <chr>  <date>       <dbl>   <dbl>     <dbl>
#>  1 AMZN   2013-12-31   248     404       156  
#>  2 AMZN   2014-12-31   287     407       120  
#>  3 AMZN   2015-12-31   287     694       407  
#>  4 AMZN   2016-12-30   482     844       362  
#>  5 FB     2013-12-31    22.9    58.0      35.1
#>  6 FB     2014-12-31    53.5    81.4      27.9
#>  7 FB     2015-12-31    74.1   109        35.0
#>  8 FB     2016-12-30    94.2   133        39.1
#>  9 GOOG   2013-12-31   351     560       209  
#> 10 GOOG   2014-12-31   495     609       114  
#> 11 GOOG   2015-12-31   493     777       284  
#> 12 GOOG   2016-12-30   668     813       145  
#> 13 NFLX   2013-12-31    13.1    54.4      41.2
#> 14 NFLX   2014-12-31    44.9    69.2      24.3
#> 15 NFLX   2015-12-31    45.5   131        85.4
#> 16 NFLX   2016-12-30    82.8   128        45.6

滚动回归

data("FB")
rolling_lm <- rollify(.f = function(close, high, low, volume) {
                              lm(close ~ high + low + volume)
                           }, 
                      window = 5, 
                      unlist = FALSE)  # 将回归结果存储为一个`S3`对象
FB_reg <- mutate(FB, roll_lm = rolling_lm(close, high, low, volume))
FB_reg

#> # A tibble: 1,008 x 9
#>    symbol date        open  high   low close    volume adjusted roll_lm  
#>    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl> <list>   
#>  1 FB     2013-01-02  27.4  28.2  27.4  28.0  69846400     28.0 <lgl [1]>
#>  2 FB     2013-01-03  27.9  28.5  27.6  27.8  63140600     27.8 <lgl [1]>
#>  3 FB     2013-01-04  28.0  28.9  27.8  28.8  72715400     28.8 <lgl [1]>
#>  4 FB     2013-01-07  28.7  29.8  28.6  29.4  83781800     29.4 <lgl [1]>
#>  5 FB     2013-01-08  29.5  29.6  28.9  29.1  45871300     29.1 <S3: lm> 
#>  6 FB     2013-01-09  29.7  30.6  29.5  30.6 104787700     30.6 <S3: lm> 
#>  7 FB     2013-01-10  30.6  31.5  30.3  31.3  95316400     31.3 <S3: lm> 
#>  8 FB     2013-01-11  31.3  32.0  31.1  31.7  89598000     31.7 <S3: lm> 
#>  9 FB     2013-01-14  32.1  32.2  30.6  31.0  98892800     31.0 <S3: lm> 
#> 10 FB     2013-01-15  30.6  31.7  29.9  30.1 173242600     30.1 <S3: lm> 
#> # ... with 998 more rows

参考

tibbletime

tibbletime操作

将常规的数据框转化为tbl_time

对tbl_time对象切片

对分组后的tbl_time对象进行取子集操作