使用SQL查找协变性

# dt---------indx_nm1-----indx_val1-------indx_nm2------indx_val2 

2009-06-08----ABQI------1001.2------------ACNACTR----------300.05

2009-06-09----ABQI------1002.12 ----------ACNACTR----------341.19

2009-06-10----ABQI------1011.4------------ACNACTR----------382.93

2009-06-11----ABQI------1015.43 ----------ACNACTR----------362.63

我有一张看起来像^(但有数百行,从2009年到2013年)的表。是否有一种方法可以计算协方差:[(indx_val1 - avg(indx_val1))*(indx_val2 - avg(indx_val2)]除以每个值indx_val1indx_val2(循环遍历整个表格)的总行数和通过dt为COV只返回一个简单的值(ABQIACNACTR)使用SQL查找协变性

回答:

既然你有过两个不同的组操作聚集,你将需要两个不同的查询。其中主要的一个组每日期,让您的行值。其他查询具有横跨整个行集执行AVG()COUNT()聚集体。

要同时使用这两个,你需要JOIN在一起。但是,因为有两个查询之间没有实际的关系,它是一个笛卡尔乘积,我们将使用一个CROSS JOIN。实际上,该连接行与聚集查询检索单列主查询。然后,您可以在SELECT列表进行运算,利用这两个值:

所以,从你刚才的问题查询建筑:

SELECT 

indxs.*,

((indx_val2 - indx_val2_avg) * (indx_val1 - indx_val1_avg))/total_rows AS cv

FROM (

SELECT

dt,

MAX(CASE WHEN indx_nm = 'ABQI' THEN indx_nm ELSE NULL END) AS indx_nm1,

MAX(CASE WHEN indx_nm = 'ABQI' THEN indx_val ELSE NULL END) AS indx_val1,

MAX(CASE WHEN indx_nm = 'ACNACTR' THEN indx_nm ELSE NULL END) AS indx_nm2,

MAX(CASE WHEN indx_nm = 'ACNACTR' THEN indx_val ELSE NULL END) AS indx_val2

FROM table1 a

GROUP BY dt

) indxs

CROSS JOIN (

/* Join against a query returning the AVG() and COUNT() across all rows */

SELECT

'ABQI' AS indx_nm1_aname,

AVG(CASE WHEN indx_nm = 'ABQI' THEN indx_val ELSE NULL END) AS indx_val1_avg,

'ACNACTR' AS indx_nm2_aname,

AVG(CASE WHEN indx_nm = 'ACNACTR' THEN indx_val ELSE NULL END) AS indx_val2_avg,

COUNT(*) AS total_rows

FROM table1 b

WHERE indx_nm IN ('ABQI','ACNACTR')

/* And it is a cartesian product */

) aggs

WHERE

indx_nm1 IS NOT NULL

AND indx_nm2 IS NOT NULL

ORDER BY dt

这里有一个演示,建立在你的前面一个:http://sqlfiddle.com/#!6/2ec65/14

回答:

这里是一个标量值的函数,在格式化为XML的任意两个列表执行协方差计算。

测试:编译那么函数执行阿尔法测试

CREATE Function [dbo].[Covariance](@XmlTwoValueSeries xml) 

returns float

as

Begin

/*

-- -----------

-- ALPHA TEST

-- -----------

IF object_id('tempdb..#_201610101706') is not null DROP TABLE #_201610101706

select *

into #_201610101706

from

(

select *

from

(

SELECT '2016-01' Period, 1.24 col0, 2.20 col1

union

SELECT '2016-02' Period, 1.6 col0, 3.20 col1

union

SELECT '2016-03' Period, 1.0 col0, 2.77 col1

union

SELECT '2016-04' Period, 1.9 col0, 2.98 col1

) A

) A

DECLARE @XmlTwoValueSeries xml

SET @XmlTwoValueSeries = (

SELECT col0,col1 FROM #_201610101706

FOR

XML PATH('Output')

)

SELECT dbo.Covariance(@XmlTwoValueSeries) Covariance

*/

declare @returnvalue numeric(20,10)

set @returnvalue =

(

SELECT SUM((x - xAvg) *(y - yAvg))/MAX(n) AS [COVAR(x,y)]

from

(

SELECT 1E * x x,

AVG(1E * x) OVER (PARTITION BY (SELECT NULL)) xAvg,

1E * y y,

AVG(1E * y) OVER (PARTITION BY (SELECT NULL)) yAvg,

COUNT(*) OVER (PARTITION BY (SELECT NULL)) n

FROM

(

SELECT

e.c.value('(col0/text())[1]', 'float') x,

e.c.value('(col1/text())[1]', 'FLOAT') y

FROM @XmlTwoValueSeries.nodes('Output') e(c)

) A

) A

)

return @returnvalue

end

GO

以上是 使用SQL查找协变性 的全部内容, 来源链接: utcz.com/qa/266933.html

回到顶部