2017-01-24 8 views
1

私は、Microsoft SQL Serverの上方向にNULLを埋める方法を理解しようとしています。これはOracleでは非常にシンプルですが、上司はそれには向かなかったのです。 Google検索の6時間後、私はついに投稿することに決めました。結果セットでMicrosoft SQL Serverには、現在行っている作業よりも両方向にNULL値を入力する方が良い方法はありますか?

....

値は、すべての可能な日付時刻とのタイムスタンプのデータです。

VALUE2は、私たちの下限埋め込みメソッドです。

VALUE3は、NULLを上向きに入力する方法です。

この虚偽の不快な表があるとします。

-- Test Data & Table 
DECLARE @TEST001 TABLE 
(TIME_STAMP datetime, 
    TagA integer, 
    TagB integer, 
    TagC integer) 
-- Insert test Values  
INSERT INTO @TEST001 
VALUES 
    ('2017-01-21 00:01:00.042', NULL, NULL, 87), 
    ('2017-01-21 00:04:10.155', NULL, 1239, NULL), 
    ('2017-01-21 00:04:10.959', NULL, NULL, 86), 
    ('2017-01-21 00:06:49.401', NULL, 1240, NULL), 
    ('2017-01-21 00:06:59.301', NULL, 1239, NULL), 
    ('2017-01-21 00:07:10.124', 108, NULL, NULL), 
    ('2017-01-21 00:12:11.789', 109, NULL, NULL), 
    ('2017-01-21 00:16:12.190', 108, NULL, NULL), 
    ('2017-01-21 00:16:13.987', 107, NULL, NULL), 
    ('2017-01-21 00:17:31.410', NULL, 1260, NULL), 
    ('2017-01-21 00:17:32.511', NULL, 1261, 87), 
    ('2017-01-21 00:17:32.966', NULL, 1262, NULL) 

この偽装されたテーブルのTagAを照会するこのQueryを指定します。

-- Start of Query used in VBS ADODB CONN. 
declare @s datetime 
declare @e datetime 
set @s = '2017-01-21 00:00:00' 
set @e = '2017-01-21 23:59:59' 
; 
-- We need to get all intervals between our two dates. 
WITH ALL_INTERVALS AS (
    SELECT TOP (datediff(mi,@s,@e)) 
    TIMES = dateadd(mi,CONVERT(INT, ROW_NUMBER() OVER (ORDER BY s1.[object_id])),@s) 
     FROM sys.all_objects AS s1 
     CROSS JOIN 
     sys.all_objects AS s2 
), 
-- We need to include both our real data and all possible intervals. 
ALL_TIMES AS (
SELECT 
    Time_Stamp as TIMES, 
    TagA AS VALUE 
FROM @TEST001 H 
WHERE Time_Stamp BETWEEN @s and @e 
UNION ALL 
SELECT 
    TIMES AS TIMES, 
    NULL AS VALUE 
    FROM ALL_INTERVALS 
), 
-- We need to find the real first value and fill all nulls with this value until we hit a new value. 
FILL_DOWN AS (SELECT 
      TIMES, 
      VALUE, 
      ISNULL(VALUE, (SELECT TOP 1 VALUE FROM ALL_TIMES WHERE TIMES < AT.TIMES AND VALUE IS NOT NULL ORDER BY TIMES DESC)) AS VALUE2 
      FROM ALL_TIMES AT 
), 
-- Our fill up method does not work if our first set of values is null. UGH, this does not work either....crap. It fills our first set 
-- With our last real value.... 
FILL_UP AS ( 
     SELECT 
      TIMES, 
      VALUE, 
      VALUE2, 
      ISNULL(VALUE2, (SELECT TOP 1 VALUE2 FROM FILL_DOWN WHERE TIMES > FD.TIMES AND VALUE2 IS NOT NULL ORDER BY TIMES DESC)) AS VALUE3 
      FROM FILL_DOWN FD 
) 
SELECT * 
FROM FILL_UP 
ORDER BY TIMES ASC 

結果

TIMES     VALUE VALUE2 VALUE3 
2017-01-2100:01:00.000 NULL NULL 107 <---------- This should be 108 from our fake crappy table. 
2017-01-2100:01:00.043 NULL NULL 107 
2017-01-2100:02:00.000 NULL NULL 107 
2017-01-2100:03:00.000 NULL NULL 107 
2017-01-2100:04:00.000 NULL NULL 107 
2017-01-2100:04:10.157 NULL NULL 107 
2017-01-2100:04:10.960 NULL NULL 107 
2017-01-2100:05:00.000 NULL NULL 107 
2017-01-2100:06:00.000 NULL NULL 107 
2017-01-2100:06:49.400 NULL NULL 107 
2017-01-2100:06:59.300 NULL NULL 107 
2017-01-2100:07:00.000 NULL NULL 107 
2017-01-2100:07:10.123 108  108  108 
2017-01-2100:08:00.000 NULL 108  108 
2017-01-2100:09:00.000 NULL 108  108 
2017-01-2100:10:00.000 NULL 108  108 
2017-01-2100:11:00.000 NULL 108  108 
2017-01-2100:12:00.000 NULL 108  108 
2017-01-2100:12:11.790 109  109  109 
2017-01-2100:13:00.000 NULL 109  109 
2017-01-2100:14:00.000 NULL 109  109 
2017-01-2100:15:00.000 NULL 109  109 
2017-01-2100:16:00.000 NULL 109  109 
2017-01-2100:16:12.190 108  108  108 
2017-01-2100:16:13.987 107  107  107 
2017-01-2100:17:00.000 NULL 107  107 
2017-01-2100:17:31.410 NULL 107  107 
2017-01-2100:17:32.510 NULL 107  107 
2017-01-2100:17:32.967 NULL 107  107 
2017-01-2100:18:00.000 NULL 107  107 
2017-01-2100:19:00.000 NULL 107  107 
2017-01-2100:20:00.000 NULL 107  107 
2017-01-2100:21:00.000 NULL 107  107 
2017-01-2100:22:00.000 NULL 107  107 
2017-01-2100:23:00.000 NULL 107  107 
2017-01-2100:24:00.000 NULL 107  107 
2017-01-2100:25:00.000 NULL 107  107 
2017-01-2100:26:00.000 NULL 107  107 

EDIT:

今日は非常に大きなデータセットで遊んと、これが最速のものであることが判明。それはちょうどちょうど私がほしいと思うものを正確にする。この小さなテーブルでは速度の違いを見ることはできませんが、大きなデータセットでは1秒と24分が大変です!すべてを助けてくれてありがとう、最高の願い!

-- Test Data & Table 
DECLARE @TEST001 TABLE 
(TIME_STAMP datetime, 
    TagA integer, 
    TagB integer, 
    TagC integer) 
-- Insert test Values  
INSERT INTO @TEST001 
VALUES 
    ('2017-01-21 00:01:00.042', NULL, NULL, 87), 
    ('2017-01-21 00:04:10.155', NULL, 1239, NULL), 
    ('2017-01-21 00:04:10.959', NULL, NULL, 86), 
    ('2017-01-21 00:06:49.401', NULL, 1240, NULL), 
    ('2017-01-21 00:06:59.301', NULL, 1239, NULL), 
    ('2017-01-21 00:07:10.124', 108, NULL, NULL), 
    ('2017-01-21 00:12:11.789', 109, NULL, NULL), 
    ('2017-01-21 00:16:12.190', 108, NULL, NULL), 
    ('2017-01-21 00:16:13.987', 107, NULL, NULL), 
    ('2017-01-21 00:17:31.410', NULL, 1260, NULL), 
    ('2017-01-21 00:17:32.511', NULL, 1261, 87), 
    ('2017-01-21 00:17:32.966', NULL, 1262, NULL) 
-- Start of Query used in VBS ADODB CONNN 
declare @s datetime 
declare @e datetime 
set @s = '2017-01-01 00:00:00' 
set @e = '2017-01-31 23:59:59' 
; 
WITH ALL_INTERVALS 
AS (SELECT TOP (datediff(mi,@s,@e)) 
    TIMES = dateadd(mi,CONVERT(INT, ROW_NUMBER() OVER (ORDER BY s1.[object_id])),@s), 
    NULL AS VALUE 
     FROM sys.all_objects AS s1 
     CROSS JOIN 
     sys.all_objects AS s2 
), 
ALL_TIMES 
AS (SELECT 
    H.TIME_STAMP as TIMES, 
    TagA AS VALUE 
    FROM @TEST001 H 
    WHERE TIME_STAMP BETWEEN @s and @e 
    UNION ALL 
    SELECT 
     AI.TIMES AS TIMES, 
     AI.VALUE AS VALUE 
     FROM ALL_INTERVALS AI 
), 
-- JUST INCASE OUR REAL TIME == INTERVAL TIME exactly. 
FILL_ACCROSS AS(SELECT TIMES AS TIMES, 
       ISNULL(AT.VALUE, (SELECT TOP 1 VALUE FROM ALL_TIMES WHERE TIMES = AT.TIMES AND VALUE IS NOT NULL ORDER BY TIMES ASC)) AS VALUE 
       FROM ALL_TIMES AT 

), 
-- FILL UP AND FILL DOWN. 
FILL_UP_DOWN AS (SELECT 
       TIMES, 
       VALUE, 
       ISNULL(FA.VALUE, (SELECT TOP 1 VALUE FROM ALL_TIMES WHERE TIMES > FA.TIMES AND VALUE IS NOT NULL ORDER BY TIMES ASC)) AS VALUE2, 
       ISNULL(FA.VALUE, (SELECT TOP 1 VALUE FROM ALL_TIMES WHERE TIMES < FA.TIMES AND VALUE IS NOT NULL ORDER BY TIMES DESC)) AS VALUE3 
       FROM FILL_ACCROSS FA 
) 
--Just a nice display of what is going on. 
select FD.TIMES, 
     FD.VALUE, 
     FD.VALUE2, 
     FD.VALUE3, 
     CASE 
      WHEN FD.VALUE3 IS NULL THEN (FD.VALUE2) 
      ELSE (FD.VALUE3) 
      END AS VALUE4 
    FROM FILL_UP_DOWN FD order by TIMES ASC 
+3

グループこれを試してみてください。サンプルデータを短くして、望ましい結果を追加できますか? –

+0

相関サブクエリで順序を変更して 'DESC'から' ASC'に 'VALUE3'を取得すると、望みの結果が得られます。現在、指定された日付の後に最後の非NULL値を探していますが、その外観では最初の非NULL値が必要です。 – GarethD

+0

FILL_DOWN順にドロップDESC私が沈静化し、スニッカーズを食べたが、あなたは@GarethDとSergありがとう、それはまだ実行に5秒かかりますが、少なくともそれは働きます!私はDESCを落とそうとしたことを誓っていますが、遅くて私はまっすぐ見ることができません。 – Serg

答えて

1

あなたは、代替を探しているなら、私は本当にあなたが何をしたいのか理解していない

-- Start of Query used in VBS ADODB CONN. 
declare @s datetime 
declare @e datetime 
set @s = '2017-01-21 00:00:00.001' 
set @e = '2017-01-21 23:59:59' 
; 
--select datepart(millisecond,@s); 
-- We need to get all intervals between our two dates. 
WITH ALL_INTERVALS AS (
    SELECT TOP (datediff(mi,@s,@e)) 
    TIMES = dateadd(mi,CONVERT(INT, ROW_NUMBER() OVER (ORDER BY s1.[object_id])),@s) 
     FROM sys.all_objects AS s1 
     CROSS JOIN 
     sys.all_objects AS s2 
), 
-- We need to include both our real data and all possible intervals. 
ALL_TIMES AS (
SELECT 
    Time_Stamp as TIMES, 
    TagA AS VALUE 
FROM @TEST001 H 
WHERE Time_Stamp BETWEEN @s and @e 
UNION ALL 
SELECT 
    TIMES AS TIMES, 
    NULL AS VALUE 
    FROM ALL_INTERVALS 
), 
g1 AS ( 
    SELECT 
     TIMES, 
     VALUE, 
     diff = case 
       when value=lag(value,1) over(order by TIMES) 
        or coalesce (value, lag(value,1) over(order by TIMES)) is null 
       then 0 else 1 end 
    FROM ALL_TIMES AT 
), 
g2 as ( 
    select TIMES, VALUE 
     , grp = sum(diff) over(order by TIMES) 
     , direction = case sum(diff) over(order by TIMES) when 0 then 1 else -1 end 
    from g1 
) 
select 
    TIMES, 
    VALUE, 
    grp, 
    ISNULL(VALUE, 
      (SELECT TOP 1 VALUE FROM g2 repl WHERE grp = g2.grp + g2.direction AND VALUE IS NOT NULL ORDER BY TIMES) 
      ) AS VALUE3 
from g2; 
+0

素晴らしい、私は違ったアプローチが大好きです、ありがとう!大きなデータセットを使用すると、元のクエリと同じ速度になることがわかりました。しかし、それを一日中遊んだ後、私は上記の編集を思いついた。大規模なデータセットを完成させるのには、ほとんどの時間がかかります。乾杯!注文を落としたり変更したりする必要性を指摘してくれてありがとう。私はその日とてもうまくやっておらず、それを理解できませんでした。 –

関連する問題