[R] Strange variable names in factor regression

Naresh Gurbuxani n@re@h_gurbux@n| @end|ng |rom hotm@||@com
Thu May 9 14:09:03 CEST 2024


On converting character variables to ordered factors, regression result
has strange names. Is it possible to obtain same variable names with
and without intercept?

Thanks,
Naresh

mydf <- data.frame(date = seq.Date(as.Date("2024-01-01"),
as.Date("2024-03-31"), by = 1))
mydf[, "wday"] <- weekdays(mydf$date, abbreviate = TRUE)
mydf.work <- subset(mydf, !(wday %in% c("Sat", "Sun")))
mydf.weekend <- subset(mydf, wday %in% c("Sat", "Sun"))
mydf.work[, "volume"] <- round(rnorm(nrow(mydf.work), mean = 20, sd =
5))
mydf.weekend[, "volume"] <- round(rnorm(nrow(mydf.weekend), mean = 10,
sd = 5))
mydf <- rbind(mydf.work, mydf.weekend)

reg <- lm(volume ~ wday, data = mydf)
## Variable names as expected
coef(reg)
(Intercept) wdayMon wdaySat wdaySun wdayThu wdayTue
21.3846154 1.3076923 -12.0000000 -12.9230769 -1.9230769 -0.6923077
wdayWed
-1.6153846

reg <- lm(volume ~ wday - 1, data = mydf)
# Variable names as expected
coef(reg)
wdayFri wdayMon wdaySat wdaySun wdayThu wdayTue wdayWed
21.384615 22.692308 9.384615 8.461538 19.461538 20.692308 19.769231

# Ordered factors for weekday sequence
mydf$wday <- factor(mydf$wday, levels = c("Mon", "Tue", "Wed", "Thu",
"Fri", "Sat", "Sun"), ordered = TRUE)

reg <- lm(volume ~ wday - 1, data = mydf)
# Variable names as expected
coef(reg)
wdayMon wdayTue wdayWed wdayThu wdayFri wdaySat wdaySun
22.692308 20.692308 19.769231 19.461538 21.384615 9.384615 8.461538

reg <- lm(volume ~ wday, data = mydf)
# Strange variable names
coef(reg)
(Intercept) wday.L wday.Q wday.C wday^4 wday^5
17.406593 -12.036715 -4.968654 -1.852819 3.291477 4.263642
wday^6
2.591317



More information about the R-help mailing list