[R] Help with loop for column means into new column by a subset Factor w/131 levels

Bill Poling B|||@Po||ng @end|ng |rom ze||@@com
Tue Apr 30 20:45:40 CEST 2019


I ran this routine but I was thinking there must be a more elegant way of doing this.


#https://community.rstudio.com/t/how-to-average-mean-variables-in-r-based-on-the-level-of-another-variable-and-save-this-as-a-new-variable/8764/8

hcd2tmp2_summmary <- hcd2tmp2 %>%
  select(.) %>%
  group_by(Procedure_Code1) %>%
  summarize(average = mean(Allowed_Amt))
# A tibble: 131 x 2
# Procedure_Code1 average
# <fct>             <dbl>
# 1 A9606            57785.
# 2 J0129             5420.
# 3 J0178             4700.
# 4 J0180            13392.
# 5 J0202            56328.
# 6 J0256            17366.
# 7 J0257             7563.
# 8 J0485             2450.
# 9 J0490             6398.
# 10 J0585            4492.
# ... with 121 more rows

hcd2tmp2 <- hcd2tmp %>%
  group_by(Procedure_Code1) %>%
  summarise(Avg_Allowed_Amt = mean(Allowed_Amt))

view(hcd2tmp2)


hcd2tmp3 <- hcd2tmp %>%
  group_by(Procedure_Code1) %>%
  summarise(Avg_AllowByLimit = mean(AllowByLimit))

view(hcd2tmp3)


hcd2tmp4 <- hcd2tmp %>%
  group_by(Procedure_Code1) %>%
  summarise(Avg_UnitsByDose = mean(UnitsByDose))

view(hcd2tmp4)

hcd2tmp5 <- hcd2tmp %>%
  group_by(Procedure_Code1) %>%
  summarise(Avg_LimitByUnits = mean(LimitByUnits))

view(hcd2tmp5)

#Joins----


hcd2tmp <- left_join(hcd2tmp2, hcd2tmp, by = c("Procedure_Code1"="Procedure_Code1"))
hcd2tmp <- left_join(hcd2tmp3, hcd2tmp, by = c("Procedure_Code1"="Procedure_Code1"))
hcd2tmp <- left_join(hcd2tmp4, hcd2tmp, by = c("Procedure_Code1"="Procedure_Code1"))
hcd2tmp <- left_join(hcd2tmp5, hcd2tmp, by = c("Procedure_Code1"="Procedure_Code1"))

view(hcd2tmp)

hcd2tmp$Avg_LimitByUnits <- round(hcd2tmp$Avg_LimitByUnits, digits = 2)
hcd2tmp$Avg_Allowed_Amt <- round(hcd2tmp$Avg_Allowed_Amt, digits = 2)
hcd2tmp$Avg_AllowByLimit <- round(hcd2tmp$Avg_AllowByLimit, digits = 2)
hcd2tmp$Avg_UnitsByDose <- round(hcd2tmp$Avg_UnitsByDose, digits = 2)

view(hcd2tmp)

#Over under columns----
hcd2tmp$AllowByLimitFlag <- hcd2tmp$AllowByLimit > hcd2tmp$Avg_AllowByLimit
hcd2tmp$LimitByUnitsFlag <- hcd2tmp$LimitByUnits > hcd2tmp$Avg_LimitByUnits
hcd2tmp$Allowed_AmtFlag  <- hcd2tmp$Allowed_Amt  > hcd2tmp$Avg_Allowed_Amt
hcd2tmp$UnitsByDoseFlag  <- hcd2tmp$UnitsByDose  > hcd2tmp$Avg_UnitsByDose

view(hcd2tmp)


-----Original Message-----
From: Bill Poling
Sent: Tuesday, April 30, 2019 12:51 PM
To: r-help (r-help using r-project.org) <r-help using r-project.org>
Cc: Bill Poling <Bill.Poling using zelis.com>
Subject: Help with loop for column means into new column by a subset Factor w/131 levels

Good afternoon.

#RStudio Version 1.1.456
sessionInfo()
#R version 3.5.3 (2019-03-11)
#Platform: x86_64-w64-mingw32/x64 (64-bit) #Running under: Windows >= 8 x64 (build 9200)



#I have a DF of 8 columns and 14025 rows

str(hcd2tmp2)

# 'data.frame':14025 obs. of  8 variables:
# $ Submitted_Charge: num  21021 15360 40561 29495 7904 ...
# $ Allowed_Amt     : num  18393 6254 40561 29495 7904 ...
# $ Submitted_Units : num  60 240 420 45 120 215 215 15 57 2 ...
# $ Procedure_Code1 : Factor w/ 131 levels "A9606","J0129",..: 43 113 117 125 24 85 85 90 86 25 ...
# $ AllowByLimit    : num  4.268 0.949 7.913 6.124 3.524 ...
# $ UnitsByDose     : num  600 240 420 450 120 215 215 750 570 500 ...
# $ LimitByUnits    : num  4310 6591 5126 4816 2243 ...
# $ HCPCSCodeDose1  : num  10 1 1 10 1 1 1 50 10 250 ...

#I would like to create four additional columns that are the mean of four current columns in the DF.
#Current columns
#Allowed_Amt
#LimitByUnits
#AllowByLimit
#UnitsByDose

#The goal is to be able to identify rows where (for instance) Allowed_Amt is greater than the average (aka outliers).

#The trick Is I want the means of those columns based on a Factor value
#The Factor is:
#Procedure_Code1 : Factor w/ 131 levels "A9606","J0129"

#So each of my four new columns will have 131 distinct values based on the mean for the specific Procedure_Code1 grouping

#In SQL it would look something like this:

#SELECT *,
# NewCol1 = mean(Allowed_Amt) OVER (PARTITION BY Procedure_Code1),
# NewCol2 = mean(LimitByUnits) OVER (PARTITION BY Procedure_Code1),
# NewCol3 = mean(AllowByLimit) OVER (PARTITION BY Procedure_Code1),
# NewCol4 = mean(UnitsByDose) OVER (PARTITION BY Procedure_Code1)
#INTO NewTable
#FROM Oldtable

#Here are some sample data

head(hcd2tmp2, n=40)
#      Submitted_Charge Allowed_Amt Submitted_Units Procedure_Code1 AllowByLimit UnitsByDose LimitByUnits HCPCSCodeDose1
# 1          21020.70    18393.12              60           J1745    4.2679810         600      4309.56             10
# 2          15360.00     6254.40             240           J9299    0.9488785         240      6591.36              1
# 3          40561.32    40561.32             420           J9306    7.9133539         420      5125.68              1
# 4          29495.25    29495.25              45           J9355    6.1244417         450      4815.99             10
# 5           7904.30     7904.30             120           J0897    3.5243000         120      2242.80              1
# 6          15331.95    10614.31             215           J9034    2.0586686         215      5155.91              1
# 7          15331.95    10614.31             215           J9034    2.0586686         215      5155.91              1
# 8            461.90        0.00              15           J9045    0.0000000         750        46.38             50
# 9          27340.96    15092.21              57           J9035    3.2600227         570      4629.48             10
# 10           768.00      576.00               2           J1190    1.3617343         500       422.99            250
# 11           101.00       38.38               5           J2250   59.9687500           5         0.64              1
# 12         17458.40        0.00             200           J9033    0.0000000         200      5990.00              1
# 13          7885.10     7569.70               1           J1745  105.3835445          10        71.83             10
# 14          2015.00     1155.78               4           J2785    5.0051100           0       230.92              0
# 15           443.72      443.72              12           J9045   11.9601078         600        37.10             50
# 16        113750.00   113750.00             600           J2350    3.3025003         600     34443.60              1
# 17          3582.85     3582.85              10           J2469   30.5573561         250       117.25             25
# 18          5152.65     5152.65              50           J2796    1.4362988         500      3587.45             10
# 19          5152.65     5152.65              50           J2796    1.4362988         500      3587.45             10
# 20         39664.09        0.00              74           J9355    0.0000000         740      7919.63             10
# 21           166.71      102.53               9           J9045    3.6841538         450        27.83             50
# 22         13823.61     9676.53               1           J2505    2.0785247           6      4655.48              6
# 23         90954.00    26436.53             360           J1786    1.7443775        3600     15155.28             10
# 24          4800.00     3494.40             800           J3262    0.8861838         800      3943.20              1
# 25           216.00      105.84               4           J0696   42.3360000        1000         2.50            250
# 26          5300.00     4770.00               1           J0178    4.9677151           1       960.20              1
# 27         35203.00    35203.00             200           J9271    3.5772498         200      9840.80              1
# 28         17589.15    17589.15             300           J3380    2.9696855         300      5922.90              1
# 29         18394.64    17842.79               1           J9355  166.7238834          10       107.02             10
# 30           770.00      731.50              10           J2469    6.2388060         250       117.25             25
# 31           461.90        0.00              15           J9045    0.0000000         750        46.38             50
# 32          8160.00     3342.40              80           J1459    1.0260818       40000      3257.44            500
# 33          1653.48      314.16               6           J9305    0.7661505          60       410.05             10
# 34         13036.50        0.00             194           J9034    0.0000000         194      4652.31              1
# 35         10486.87        0.00             156           J9034    0.0000000         156      3741.04              1
# 36         15360.00     6254.40             240           J9299    0.9488785         240      6591.36              1
# 37          1616.83     1616.83             150           J1453    5.2528590         150       307.80              1
# 38         80685.74    34772.43              96           J9035    4.4597077         960      7797.02             10
# 39         85220.58    35925.13             287           J9299    4.5577715         287      7882.17              1
# 40          3860.17     1627.27              13           J9299    4.5577963          13       357.03              1


#I hope this is enough inforamtion to warrant your support
#Thank you
#WHP



Confidentiality Notice This message is sent from Zelis. ...{{dropped:13}}



More information about the R-help mailing list