Apachedruid计算常见指标

编程

Druid 支持SQL

[ EXPLAIN PLAN FOR ]

[ WITH tableName [ ( column1, column2, ... ) ] AS ( query ) ]

SELECT [ ALL | DISTINCT ] { * | exprs }

FROM table

[ WHERE expr ]

[ GROUP BY exprs ]

[ HAVING expr ]

[ ORDER BY expr [ ASC | DESC ], expr [ ASC | DESC ], ... ]

[ LIMIT limit ]

[ UNION ALL <another query> ]

持续时间数据类型

持续时间数据类型用于指定时间间隔。

时间间隔以以下格式“ PnYnMnDTnHnMnS”指定,其中:

  • P表示周期(必填)

  • nY表示年数

  • nM表示月数

  • nD表示天数

  • T表示时间段的开始(如果要指定小时,分钟或秒,则为必填)

  • nH表示小时数

  • nM表示分钟数

  • nS表示秒数

  • CURRENT_TIMESTAMP:连接时区中的当前时间戳。

  • CURRENT_DATE:连接时区中的当前日期。

  • INTERVAL:

**事件选择:**能选择任意传来的事件查看相应数据

  • 次数(pv):用户做这个事的次数, > [ count() ]

  • 人数(uv):用户做这个事的人数, > [?]

  • 人均次数(pv/uv):做这个事的人,每人平均多少次,也可以用组合事件查(举例:event_total_video_play_success: 激励视频总计-播放成功)>[次数/人数]

  • 全活跃用户人均次数:全部活跃人数,平均每人多少次,也可以用组合事件查

    • 主要可以来查人均播放了多少广告等数据

    [怎样的算活跃用户]

  • 按……求和:某个参数的数字求和,比较常用的是统计时长

    • 可以通过求和playTime,来查时长,注意去掉异常值,查某个游戏的活跃、时长、留存
    • play_game: 玩游戏的时间,按playtime(毫秒)求和

      • playtime > 2000 (每次时长大于2s才统计)
      • playtime < 2160000(小于异常值)

  • 按……求平均值:不常用

  • 按……求人均值:不常用

  • 按……求分位数:

  • 按...去重数

    • 对事件参数进行去重统计,可以用这个来查我们平台每天拉起的游戏数有多少个,查平台的拉起的总游戏数---【按...去重count(distinct)】
    • play_game,按packageName去重数

  • 按……和用户去重

    • 对事件参数和做过该事件的用户进行去重统计,平台内玩游戏A的人有100人,游戏B的人有200人,游戏C的人有300人,则这种方式查出来的数是100+200+300,利用这个可以查某个版本/实验的人均游戏个数
    • play_game,按packageName和用户去重

  • 按……查看分布

    • 可以看某个事件的事件发生频次查某个事件的次数分布
    • event_scratcher_show_reward_dialog: 互动游戏普通奖励弹窗弹出

**新增指标和组合指标:**能够几个指标进行加减乘除运算

数据查询:

  • 时间间隔: (月、周、天、小时、5分钟)

  • 数据查询方案(sql + json),

    • 查询事件:( play_game:通过我玩过的直接拉起游戏)

指标

  • 次数(pv)

    play_game事件的总次数

-- SQL 

SELECT count(*) as "PV"

FROM "currency_record"

WHERE "__time" >= CURRENT_TIMESTAMP - INTERVAL "1" DAY

// json

{

"queryType":"groupBy",

"dataSource":"currency_record",

"granularity":"hour",

"aggregations":[

{

"type":"count",

"name":"count"

}

],

"intervals":[

"2000-01-01T00:00Z/3000-01-01T00:00Z"

]

}

  • 人数(uv)

-- SQL

SELECT count(DISTINCT "android_id") as "UV"

FROM "currency_record"

WHERE "__time" >= CURRENT_TIMESTAMP - INTERVAL "1" DAY

// json

{

"queryType":"groupBy",

"dataSource":"currency_record",

"granularity":"day",

"aggregations":[

{

"type":"count",

"name":"count"

}

],

"intervals":[

"2000-01-01T00:00Z/3000-01-01T00:00Z"

]

}

  • 人均次数(pv/uv)

-- sql

SELECT SUM("recordtype") as "sum_recordtype", COUNT(DISTINCT "imei" ) as "imei_count", TRUNCATE(SUM("recordtype")/(COUNT(DISTINCT "imei" )*1.0), 2) as "avg_recordtype"

FROM "currency_record"

WHERE "__time" >= CURRENT_TIMESTAMP - INTERVAL "1" DAY

{ 

// 查询每天的UV .51s .30s .25s 平均 .353s

// -- 查询类型对应聚合类型有3中类型值,

// -- timeseries topN groupBy 如果对于单个维度和指标进行分组聚合计算,推荐使用topN查询,

// -- 能够获得更高的性能,分组适合多维度,多指标聚合查询

// -- timeseries 时间序列查询,对于指定的时间段按照查询规则返回聚合后的结果集,

// -- 查询规则中可以设置查询力度,结果排序方式以及过滤条件,过滤条件可以使用嵌套过滤,并且支持后聚合

"queryType":"timeseries",

"dataSource":"currency_record",

"granularity":"day",

"aggregations":[

{

"type":"count",

"name":"pv",

"fieldName":"uuid",

},

{

// thetaSketch

"type":"thetaSketch",

"name":"uv",

"fieldName":"uuid",

}

],

"postAggregations": [

{

"type": "arithmetic",

"name": "click_rate",

"fn": "/",

"fields": [

{

"type": "hyperUniqueCardinality",

"name": "click_cnt",

"fieldName": "pv"

},

{

// 如果type使用fieldAccess 这个的话会造成报错,提示说,不能转为Number

"type": "hyperUniqueCardinality",

"name": "pv_cnt",

"fieldName": "uv"

},

],

}

]

"intervals":[

"2000-01-01T00:00Z/3000-01-01T00:00Z"

]

}

  • 按……求和

-- sql

SELECT SUM("recordtype")

FROM "currency_record"

WHERE "__time" >= CURRENT_TIMESTAMP - INTERVAL "1" DAY AND "recordtype" <= 40 AND "recordtype" > 25

// json

{

"queryType":"timeseries",

"dataSource":"currency_record",

"granularity":"day",

"aggregations":[

{

"type":"count",

"name":"recordtype",

"fieldName":"recordtype",

},

],

"filter":

{

"type": "bound",

"dimension": "recordtype",

"lower": 25,

"upper": 40

"alphaNumeric": true

},

"intervals":[

"2000-01-01T00:00Z/3000-01-01T00:00Z"

]

}

  • 按……求平均值

-- sql

SELECT SUM("recordtype") as "sum_recordtype", COUNT(*) as "count", AVG("recordtype") as "avg_recordtype"

FROM "currency_record"

WHERE "__time" >= CURRENT_TIMESTAMP - INTERVAL "1" DAY AND "recordtype" <= 40 AND "recordtype" > 25

// json

{

"queryType":"timeseries",

"dataSource":"currency_record",

"granularity":"month",

"aggregations":[

{

"type":"longSum",

"name":"recordtype",

"fieldName":"recordtype",

},

{

"type":"count",

"name":"pv",

"fieldName":"uuid",

}

],

"postAggregations": [

{

"type": "arithmetic",

"name": "avg",

"fn": "/",

"fields": [

{

"type": "hyperUniqueCardinality",

"name": "click_cnt",

"fieldName": "recordtype"

},

{

// 如果type使用fieldAccess 这个的话会造成报错,提示说,不能转为Number

"type": "hyperUniqueCardinality",

"name": "pv_cnt",

"fieldName": "pv"

},

],

}

]

"filter":

{

"type": "bound",

"dimension": "recordtype",

"lower": 25,

"upper": 40

"alphaNumeric": true

},

"intervals":[

"2000-01-01T00:00Z/3000-01-01T00:00Z"

]

}

  • 按……求人均值

-- SQL

SELECT SUM("recordtype") as "sum_recordtype", COUNT(DISTINCT "imei" ) as "imei_count", SUM("recordtype")/COUNT(DISTINCT "imei" ) as "avg_recordtype"

FROM "currency_record"

WHERE "__time" >= CURRENT_TIMESTAMP - INTERVAL "1" DAY AND "recordtype" <= 40 AND "recordtype" > 25

// JSON

{

"queryType":"timeseries",

"dataSource":"currency_record",

"granularity":"month",

"aggregations":[

{

"type":"longSum",

"name":"recordtype",

"fieldName":"recordtype",

},

{

"type":"thetaSketch",

"name":"imeiCount",

"fieldName":"imei",

}

],

"postAggregations": [

{

"type": "arithmetic",

"name": "avg",

"fn": "/",

"fields": [

{

"type": "hyperUniqueCardinality",

"name": "click_cnt",

"fieldName": "recordtype"

},

{

// 如果type使用fieldAccess 这个的话会造成报错,提示说,不能转为Number

"type": "hyperUniqueCardinality",

"name": "pv_cnt",

"fieldName": "imeiCount"

},

],

}

]

"filter":

{

"type": "bound",

"dimension": "recordtype",

"lower": 25,

"upper": 40

"alphaNumeric": true

},

"intervals":[

"2000-01-01T00:00Z/3000-01-01T00:00Z"

]

}

  • 按……求分位数

-- SQL

SELECT "Time", sum("=1") AS "=1" ,sum("2~3") AS "2~3" ,sum("4~8") AS "4~8", sum("9~16") AS "9~16", sum("17~25") AS "17~25"

from (

SELECT

"Time",

case WHEN "Count1" = 1 then 1 ELSE 0 end as "=1",

case WHEN "Count1" > 1 and "Count1" >= 3 then 1 ELSE 0 end as "2~3",

case WHEN "Count1" > 3 and "Count1" <= 8 then 1 ELSE 0 end as "4~8",

case WHEN "Count1" > 8 and "Count1" <= 16 then 1 ELSE 0 end as "9~16",

case WHEN "Count1" > 16 and "Count1" <= 25 then 1 ELSE 0 end as "17~25",

case WHEN "Count1" > 25 then 1 ELSE 0 end as ">25"

from (

SELECT

TIME_FLOOR("__time", "PT5M") AS "Time", "uuid", COUNT(*) AS "Count1"

FROM "app_time"

WHERE "__time" >= CURRENT_TIMESTAMP - INTERVAL "1" HOUR AND "__time" <= CURRENT_TIMESTAMP

GROUP BY 1, 2

ORDER BY "Time" ASC ))

GROUP BY 1

ORDER BY "Time"

// JSON

类型转换

  • 求人群分类

SELECT SUM("recordtype") as "sum_recordtype", COUNT(DISTINCT "imei" ) as "imei_count", TRUNCATE(SUM("recordtype")/(COUNT(DISTINCT "imei" )*1.0), 2) as "avg_recordtype"

FROM "currency_record"

WHERE "__time" >= CURRENT_TIMESTAMP - INTERVAL "1" DAY AND "uuid" IN (SELECT DISTINCT "uuid"

FROM "currency_record"

WHERE "__time" >= CURRENT_TIMESTAMP - INTERVAL "1" DAY AND "uuid" LIKE "%b31%")

  • 按.... 查看分布

SELECT "Time", SUM("A"), SUM("B"), SUM("C"), SUM("D")

FROM (

SELECT

"Time",

(case when "Count" <= 2 then 1 ELSE 0 end) as A,

(case when "Count" > 2 AND "Count" <= 6 then 1 ELSE 0 end) as B,

(case when "Count" > 6 AND "Count" <= 10 then 1 ELSE 0 end) as C,

(case when "Count" > 10 then 1 ELSE 0 end) as D

FROM (

SELECT

TIME_FLOOR("__time", "PT5M") AS "Time", "uuid", COUNT(*) AS "Count"

FROM "app_time"

WHERE "__time" >= CURRENT_TIMESTAMP - INTERVAL "1" HOUR AND "__time" <= CURRENT_TIMESTAMP

GROUP BY 1, "uuid"

) )

GROUP BY 1

ORDER BY "Time"

SELECT count(*) as "PV" ,  count(DISTINCT "android_id") as "UV", count(*)/count(DISTINCT "android_id") as "PV/UV"

FROM "currency_record"

WHERE "__time" >= CURRENT_TIMESTAMP - INTERVAL "1" DAY

以上是 Apachedruid计算常见指标 的全部内容, 来源链接: utcz.com/z/511291.html

回到顶部