[R] Help with loop for column means into new column by a subset Factor w/131 levels
Bill Poling
B|||@Po||ng @end|ng |rom ze||@@com
Tue Apr 30 20:45:40 CEST 2019
I ran this routine but I was thinking there must be a more elegant way of doing this.
#https://community.rstudio.com/t/how-to-average-mean-variables-in-r-based-on-the-level-of-another-variable-and-save-this-as-a-new-variable/8764/8
hcd2tmp2_summmary <- hcd2tmp2 %>%
select(.) %>%
group_by(Procedure_Code1) %>%
summarize(average = mean(Allowed_Amt))
# A tibble: 131 x 2
# Procedure_Code1 average
# <fct> <dbl>
# 1 A9606 57785.
# 2 J0129 5420.
# 3 J0178 4700.
# 4 J0180 13392.
# 5 J0202 56328.
# 6 J0256 17366.
# 7 J0257 7563.
# 8 J0485 2450.
# 9 J0490 6398.
# 10 J0585 4492.
# ... with 121 more rows
hcd2tmp2 <- hcd2tmp %>%
group_by(Procedure_Code1) %>%
summarise(Avg_Allowed_Amt = mean(Allowed_Amt))
view(hcd2tmp2)
hcd2tmp3 <- hcd2tmp %>%
group_by(Procedure_Code1) %>%
summarise(Avg_AllowByLimit = mean(AllowByLimit))
view(hcd2tmp3)
hcd2tmp4 <- hcd2tmp %>%
group_by(Procedure_Code1) %>%
summarise(Avg_UnitsByDose = mean(UnitsByDose))
view(hcd2tmp4)
hcd2tmp5 <- hcd2tmp %>%
group_by(Procedure_Code1) %>%
summarise(Avg_LimitByUnits = mean(LimitByUnits))
view(hcd2tmp5)
#Joins----
hcd2tmp <- left_join(hcd2tmp2, hcd2tmp, by = c("Procedure_Code1"="Procedure_Code1"))
hcd2tmp <- left_join(hcd2tmp3, hcd2tmp, by = c("Procedure_Code1"="Procedure_Code1"))
hcd2tmp <- left_join(hcd2tmp4, hcd2tmp, by = c("Procedure_Code1"="Procedure_Code1"))
hcd2tmp <- left_join(hcd2tmp5, hcd2tmp, by = c("Procedure_Code1"="Procedure_Code1"))
view(hcd2tmp)
hcd2tmp$Avg_LimitByUnits <- round(hcd2tmp$Avg_LimitByUnits, digits = 2)
hcd2tmp$Avg_Allowed_Amt <- round(hcd2tmp$Avg_Allowed_Amt, digits = 2)
hcd2tmp$Avg_AllowByLimit <- round(hcd2tmp$Avg_AllowByLimit, digits = 2)
hcd2tmp$Avg_UnitsByDose <- round(hcd2tmp$Avg_UnitsByDose, digits = 2)
view(hcd2tmp)
#Over under columns----
hcd2tmp$AllowByLimitFlag <- hcd2tmp$AllowByLimit > hcd2tmp$Avg_AllowByLimit
hcd2tmp$LimitByUnitsFlag <- hcd2tmp$LimitByUnits > hcd2tmp$Avg_LimitByUnits
hcd2tmp$Allowed_AmtFlag <- hcd2tmp$Allowed_Amt > hcd2tmp$Avg_Allowed_Amt
hcd2tmp$UnitsByDoseFlag <- hcd2tmp$UnitsByDose > hcd2tmp$Avg_UnitsByDose
view(hcd2tmp)
-----Original Message-----
From: Bill Poling
Sent: Tuesday, April 30, 2019 12:51 PM
To: r-help (r-help using r-project.org) <r-help using r-project.org>
Cc: Bill Poling <Bill.Poling using zelis.com>
Subject: Help with loop for column means into new column by a subset Factor w/131 levels
Good afternoon.
#RStudio Version 1.1.456
sessionInfo()
#R version 3.5.3 (2019-03-11)
#Platform: x86_64-w64-mingw32/x64 (64-bit) #Running under: Windows >= 8 x64 (build 9200)
#I have a DF of 8 columns and 14025 rows
str(hcd2tmp2)
# 'data.frame':14025 obs. of 8 variables:
# $ Submitted_Charge: num 21021 15360 40561 29495 7904 ...
# $ Allowed_Amt : num 18393 6254 40561 29495 7904 ...
# $ Submitted_Units : num 60 240 420 45 120 215 215 15 57 2 ...
# $ Procedure_Code1 : Factor w/ 131 levels "A9606","J0129",..: 43 113 117 125 24 85 85 90 86 25 ...
# $ AllowByLimit : num 4.268 0.949 7.913 6.124 3.524 ...
# $ UnitsByDose : num 600 240 420 450 120 215 215 750 570 500 ...
# $ LimitByUnits : num 4310 6591 5126 4816 2243 ...
# $ HCPCSCodeDose1 : num 10 1 1 10 1 1 1 50 10 250 ...
#I would like to create four additional columns that are the mean of four current columns in the DF.
#Current columns
#Allowed_Amt
#LimitByUnits
#AllowByLimit
#UnitsByDose
#The goal is to be able to identify rows where (for instance) Allowed_Amt is greater than the average (aka outliers).
#The trick Is I want the means of those columns based on a Factor value
#The Factor is:
#Procedure_Code1 : Factor w/ 131 levels "A9606","J0129"
#So each of my four new columns will have 131 distinct values based on the mean for the specific Procedure_Code1 grouping
#In SQL it would look something like this:
#SELECT *,
# NewCol1 = mean(Allowed_Amt) OVER (PARTITION BY Procedure_Code1),
# NewCol2 = mean(LimitByUnits) OVER (PARTITION BY Procedure_Code1),
# NewCol3 = mean(AllowByLimit) OVER (PARTITION BY Procedure_Code1),
# NewCol4 = mean(UnitsByDose) OVER (PARTITION BY Procedure_Code1)
#INTO NewTable
#FROM Oldtable
#Here are some sample data
head(hcd2tmp2, n=40)
# Submitted_Charge Allowed_Amt Submitted_Units Procedure_Code1 AllowByLimit UnitsByDose LimitByUnits HCPCSCodeDose1
# 1 21020.70 18393.12 60 J1745 4.2679810 600 4309.56 10
# 2 15360.00 6254.40 240 J9299 0.9488785 240 6591.36 1
# 3 40561.32 40561.32 420 J9306 7.9133539 420 5125.68 1
# 4 29495.25 29495.25 45 J9355 6.1244417 450 4815.99 10
# 5 7904.30 7904.30 120 J0897 3.5243000 120 2242.80 1
# 6 15331.95 10614.31 215 J9034 2.0586686 215 5155.91 1
# 7 15331.95 10614.31 215 J9034 2.0586686 215 5155.91 1
# 8 461.90 0.00 15 J9045 0.0000000 750 46.38 50
# 9 27340.96 15092.21 57 J9035 3.2600227 570 4629.48 10
# 10 768.00 576.00 2 J1190 1.3617343 500 422.99 250
# 11 101.00 38.38 5 J2250 59.9687500 5 0.64 1
# 12 17458.40 0.00 200 J9033 0.0000000 200 5990.00 1
# 13 7885.10 7569.70 1 J1745 105.3835445 10 71.83 10
# 14 2015.00 1155.78 4 J2785 5.0051100 0 230.92 0
# 15 443.72 443.72 12 J9045 11.9601078 600 37.10 50
# 16 113750.00 113750.00 600 J2350 3.3025003 600 34443.60 1
# 17 3582.85 3582.85 10 J2469 30.5573561 250 117.25 25
# 18 5152.65 5152.65 50 J2796 1.4362988 500 3587.45 10
# 19 5152.65 5152.65 50 J2796 1.4362988 500 3587.45 10
# 20 39664.09 0.00 74 J9355 0.0000000 740 7919.63 10
# 21 166.71 102.53 9 J9045 3.6841538 450 27.83 50
# 22 13823.61 9676.53 1 J2505 2.0785247 6 4655.48 6
# 23 90954.00 26436.53 360 J1786 1.7443775 3600 15155.28 10
# 24 4800.00 3494.40 800 J3262 0.8861838 800 3943.20 1
# 25 216.00 105.84 4 J0696 42.3360000 1000 2.50 250
# 26 5300.00 4770.00 1 J0178 4.9677151 1 960.20 1
# 27 35203.00 35203.00 200 J9271 3.5772498 200 9840.80 1
# 28 17589.15 17589.15 300 J3380 2.9696855 300 5922.90 1
# 29 18394.64 17842.79 1 J9355 166.7238834 10 107.02 10
# 30 770.00 731.50 10 J2469 6.2388060 250 117.25 25
# 31 461.90 0.00 15 J9045 0.0000000 750 46.38 50
# 32 8160.00 3342.40 80 J1459 1.0260818 40000 3257.44 500
# 33 1653.48 314.16 6 J9305 0.7661505 60 410.05 10
# 34 13036.50 0.00 194 J9034 0.0000000 194 4652.31 1
# 35 10486.87 0.00 156 J9034 0.0000000 156 3741.04 1
# 36 15360.00 6254.40 240 J9299 0.9488785 240 6591.36 1
# 37 1616.83 1616.83 150 J1453 5.2528590 150 307.80 1
# 38 80685.74 34772.43 96 J9035 4.4597077 960 7797.02 10
# 39 85220.58 35925.13 287 J9299 4.5577715 287 7882.17 1
# 40 3860.17 1627.27 13 J9299 4.5577963 13 357.03 1
#I hope this is enough inforamtion to warrant your support
#Thank you
#WHP
Confidentiality Notice This message is sent from Zelis. ...{{dropped:13}}
More information about the R-help
mailing list