在自引用表中获取平均薪资的SQL

4

我看到了很多关于如何递归地查询自引用表的问题/答案,但是我很难将我找到的答案应用到每个父级、祖父级等等,而不管该项在层次结构中的位置。

需要获取每个部门的平均薪资,包括层次结构。 这意味着部门应该包括每个子部门等的平均工资。

我有一个nex数据库模式:

CREATE TABLE Employee
    (
      Id INT NOT NULL ,
      Name VARCHAR(200) NOT NULL ,
      Department_Id INT NOT NULL ,
      PRIMARY KEY ( Id )
    ); 

CREATE TABLE Department
    (
      Id INT NOT NULL ,
      DepartmentName VARCHAR(200) NOT NULL ,
      Parent_Id INT ,
      PRIMARY KEY ( Id )
    );

CREATE TABLE Salary
    (
      Id INT NOT NULL ,
      Date DATETIME NOT NULL ,
      Amount INT NOT NULL ,
      Employee_Id INT NOT NULL ,
      PRIMARY KEY ( Id )
    );

我尝试了类似的方法,但它只包括层级结构的第一层。

SELECT  d.Id ,
        d.DepartmentName ,
        ( SELECT    AVG(s.Amount)
                FROM      dbo.Department dd
                        LEFT JOIN dbo.Department sdd ON dd.Id = sdd.Parent_Id 
                        JOIN dbo.Employee e ON e.Department_Id = sdd.Id
                                                OR e.Department_Id = dd.Id
                        JOIN dbo.Salary s ON s.Employee_Id = e.Id
                                                WHERE dd.Id = d.Id
        ) AS avg_dep_salary 
FROM    dbo.Department d
WHERE   d.Parent_Id IS NULL;

如何获得所有级别的平均工资?
编辑:添加了一些插入内容。
INSERT  INTO Employee
        ( Id, Name, Department_Id )
VALUES  ( 1, 'Peter', 1 ),
        ( 2, 'Alex', 1 ),
        ( 3, 'Sam', 2 ),
        ( 4, 'James', 2 ),
        ( 5, 'Anna', 3 ),
        ( 6, 'Susan', 3 ),
        ( 7, 'Abby', 4 ),
        ( 8, 'Endy', 4 );

INSERT  INTO Department
        ( Id, DepartmentName, Parent_Id )
VALUES  ( 1, 'IT', NULL ),
        ( 2, 'HR', NULL),
        ( 3, 'SubIT', 1 ),
        ( 4, 'SubSubIT', 3 );

INSERT  INTO Salary
        ( Id, Date, Amount, Employee_Id )
VALUES  ( 1, '2013-01-09 16:03:50.003', 3000, 1 ),
        ( 2, '2013-01-11 16:03:50.003', 5000, 2 ),
        ( 3, '2013-01-09 16:03:50.003', 2000, 3 ),
        ( 4, '2013-01-11 16:03:50.003', 1000, 4 ),
        ( 5, '2013-01-09 16:03:50.003', 4000, 5 ),
        ( 6, '2013-01-11 16:03:50.003', 6000, 6 ),
        ( 7, '2013-01-09 16:03:50.003', 7000, 7 ),
        ( 8, '2013-01-13 16:03:50.003', 9000, 8 );

预期结果是:

Department | Average_Salary
__________________________________
IT         |  ( X1 + X2 + X3 ) / 3
HR         |  ( Y1 ) / 1
SubIT      |  ( X2 + X3 ) / 2
SubSubIT   |  ( X3 ) / 1

当:

  • X1 - IT部门的平均工资
  • X2 - 子IT部门的平均工资
  • X3 - 孙子IT部门的平均工资
  • Y1 - 人力资源部门的平均工资

1
很好,你已经包含了创建语句,能否再添加一些插入样本数据的语句,并提供该样本集的预期结果? - HoneyBadger
你尝试过将结果合并后再求平均值吗? - happymacarts
递归CTE可能会对此有所帮助。 - Anton
按照您的要求,我已经添加了插入并提供了期望的结果 :) - Yuriy Oleynik
这个准确吗 - IT | (X1 + X2 + X3) / 3 - 难道不应该根据部门规模进行加权吗? - Ctznkane525
3个回答

3

样例数据

我已经添加了几行具有更宽的树形结构。

DECLARE @Employee TABLE
(
Id INT NOT NULL ,
Name VARCHAR(200) NOT NULL ,
Department_Id INT NOT NULL ,
PRIMARY KEY ( Id )
); 

DECLARE @Department TABLE
(
Id INT NOT NULL ,
DepartmentName VARCHAR(200) NOT NULL ,
Parent_Id INT ,
PRIMARY KEY ( Id )
);

DECLARE @Salary TABLE
(
Id INT NOT NULL ,
Date DATETIME NOT NULL ,
Amount INT NOT NULL ,
Employee_Id INT NOT NULL ,
PRIMARY KEY ( Id )
);

INSERT  INTO @Employee
( Id, Name, Department_Id )
VALUES  
( 1, 'Peter', 1 ),
( 2, 'Alex',  1 ),
( 3, 'Sam',   2 ),
( 4, 'James', 2 ),
( 5, 'Anna',  3 ),
( 6, 'Susan', 3 ),
( 7, 'Abby',  4 ),
( 8, 'Endy',  4 ),
(10, 'e_A',   10),
(11, 'e_AB',  11),
(12, 'e_AC',  12),
(13, 'e_AD',  13),
(14, 'e_ACE', 14),
(15, 'e_ACF', 15),
(16, 'e_ACG', 16);

INSERT  INTO @Department
( Id, DepartmentName, Parent_Id )
VALUES  
( 1, 'IT', NULL ),
( 2, 'HR', NULL),
( 3, 'SubIT', 1 ),
( 4, 'SubSubIT', 3 ),
(10, 'A', NULL ),
(11, 'AB', 10),
(12, 'AC', 10),
(13, 'AD', 10),
(14, 'ACE', 12),
(15, 'ACF', 12),
(16, 'ACG', 12);

INSERT  INTO @Salary
( Id, Date, Amount, Employee_Id )
VALUES  
( 1, '2013-01-09 16:03:50.003', 3000, 1 ),
( 2, '2013-01-11 16:03:50.003', 5000, 2 ),
( 3, '2013-01-09 16:03:50.003', 2000, 3 ),
( 4, '2013-01-11 16:03:50.003', 1000, 4 ),
( 5, '2013-01-09 16:03:50.003', 4000, 5 ),
( 6, '2013-01-11 16:03:50.003', 6000, 6 ),
( 7, '2013-01-09 16:03:50.003', 7000, 7 ),
( 8, '2013-01-13 16:03:50.003', 9000, 8 ),
(10, '2013-01-13 16:03:50', 100, 10),
(11, '2013-01-13 16:03:50', 100, 11),
(12, '2013-01-13 16:03:50', 100, 12),
(13, '2013-01-13 16:03:50', 100, 13),
(14, '2013-01-13 16:03:50', 100, 14),
(15, '2013-01-13 16:03:50', 100, 15),
(16, '2013-01-13 16:03:50', 100, 16);

查询

WITH
CTE_Departments
AS
(
    SELECT
        D.Id
        ,D.Parent_Id
        ,D.DepartmentName
        ,SUM(Amount) AS DepartmentAmount
        ,COUNT(*) AS DepartmentCount
    FROM
        @Department AS D
        INNER JOIN @Employee AS E ON E.Department_Id = D.Id
        INNER JOIN @Salary AS S ON S.Employee_Id = E.Id
    GROUP BY
        D.Id
        ,D.Parent_Id
        ,D.DepartmentName
)
,CTE_Recursive
AS
(
    SELECT
         CTE_Departments.Id AS OriginalID
        ,CTE_Departments.DepartmentName AS OriginalName
        ,CTE_Departments.Id
        ,CTE_Departments.Parent_Id
        ,CTE_Departments.DepartmentName
        ,CTE_Departments.DepartmentAmount
        ,CTE_Departments.DepartmentCount
        ,1 AS Lvl
    FROM CTE_Departments

    UNION ALL

    SELECT
         CTE_Recursive.OriginalID
        ,CTE_Recursive.OriginalName
        ,CTE_Departments.Id
        ,CTE_Departments.Parent_Id
        ,CTE_Departments.DepartmentName
        ,CTE_Departments.DepartmentAmount
        ,CTE_Departments.DepartmentCount
        ,CTE_Recursive.Lvl + 1 AS Lvl
    FROM
        CTE_Departments
        INNER JOIN CTE_Recursive ON CTE_Recursive.Id = CTE_Departments.Parent_Id
)
SELECT
    OriginalID
    ,OriginalName
    ,SUM(DepartmentAmount) AS SumAmount
    ,SUM(DepartmentCount) AS SumCount
    ,SUM(DepartmentAmount) / SUM(DepartmentCount) AS AvgAmount
FROM CTE_Recursive
GROUP BY
    OriginalID
    ,OriginalName
ORDER BY OriginalID
;

结果

+------------+--------------+-----------+----------+-----------+
| OriginalID | OriginalName | SumAmount | SumCount | AvgAmount |
+------------+--------------+-----------+----------+-----------+
|          1 | IT           |     34000 |        6 |      5666 |
|          2 | HR           |      3000 |        2 |      1500 |
|          3 | SubIT        |     26000 |        4 |      6500 |
|          4 | SubSubIT     |     16000 |        2 |      8000 |
|         10 | A            |       700 |        7 |       100 |
|         11 | AB           |       100 |        1 |       100 |
|         12 | AC           |       400 |        4 |       100 |
|         13 | AD           |       100 |        1 |       100 |
|         14 | ACE          |       100 |        1 |       100 |
|         15 | ACF          |       100 |        1 |       100 |
|         16 | ACG          |       100 |        1 |       100 |
+------------+--------------+-----------+----------+-----------+

逐步运行查询,逐个CTE地理解其工作原理。

CTE_Departments 为每个部门提供总人数和人数。

CTE_Recursive 递归生成每个部门的子行,同时保留OriginalID - 递归开始的部门的ID。

最终查询简单地通过这个OriginalID对所有内容进行分组。


1
这是一种方法。
with avg_per_dep as (
    select
        [Month] = eomonth(s.date), d.Id, d.DepartmentName
        , avgDep = avg(s.Amount * 1.0)
    from 
        Salary s
        join Employee e on s.Employee_Id = e.Id
        join Department d on e.Department_Id = d.Id
    group by d.Id, d.DepartmentName, eomonth(s.date)
)
, rcte as (
    select
        i = Id, Id
        , list = cast(',' + cast(Id as varchar(10)) + ',' as varchar(max))
        , step = 1
    from 
        Department
    union all
    select
        a.i, b.Id, cast(a.list + cast(b.Id as varchar(10)) + ',' as varchar(max))
        , step + 1
    from
        rcte a
        join Department b on a.Id = b.Parent_Id
)
select
    d.DepartmentName, c.[Month]
    , Average_Salary = avg(c.avgDep)
from
    (
        select
            top 1 with ties i, list 
        from 
            rcte
        order by row_number() over (partition by i order by step desc)
    ) t
    join avg_per_dep c on t.list like '%,' + cast(c.Id as varchar(10)) + ',%'
    join Department d on t.i = d.Id
group by t.i, d.DepartmentName, c.[Month]

输出

DepartmentName    [Month]       Average_Salary
---------------------------------------------
IT                2013-01-31    5666.666666
HR                2013-01-31    1500.000000
SubIT             2013-01-31    6500.000000
SubSubIT          2013-01-31    8000.000000

想法:

  1. 计算每个部门的平均工资
  2. 使用递归CTE获取所有子部门的部门列表。
  3. 连接两个表并计算其子部门的平均值

1
你也可以使用以下查询来获取期望的结果。
WITH Department_Path 
     AS (SELECT Id, CAST(CONCAT('@', Id, '@') AS VARCHAR(255)) AS Path 
         FROM   Department 
         WHERE  Parent_Id IS NULL 
         UNION ALL 
         SELECT Child.Id, CAST(CONCAT(Parent.Path, Child.Id, '@') AS VARCHAR(255)) AS Path 
         FROM   Department Child 
                INNER JOIN Department_Path Parent 
                        ON Parent.Id = Child.Parent_Id) 
SELECT Department.Id, 
       Department.DepartmentName, 
       AVG(Salary.Amount) As Average_Salary, 
       COUNT(Employee.Id) AS Employee_Count 
FROM   Department 
       INNER JOIN Department_Path 
               ON CHARINDEX(CONCAT('@', Department.Id, '@'), Department_Path.Path) > 0 
       INNER JOIN Employee 
               ON Employee.Department_Id = Department_Path.Id 
       INNER JOIN Salary 
               ON Salary.Employee_Id = Employee.Id 
GROUP  BY Department.Id, 
          Department.DepartmentName; 

每个员工都属于一个部门的层级列表。对于每个部门,我们可以检索属于该部门的所有员工,然后计算平均薪资。

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接