[R] Plotting factors in graph panel
Anupam Tyagi
@nupty@g| @end|ng |rom gm@||@com
Thu Jul 6 11:41:35 CEST 2023
Hi John:
Thanks! Below is the data using your suggestion. I used "ggplot" to make a
graph. I am not too happy with it. I am looking for something simpler and
cleaner. Plot is attached.
I also tried "lattice" package, but nothing got plotted with "xyplot"
command, because it is looking for a numeric variable on x-axis.
ggplot(TrialData4, aes(x=Income, y=Percent, group=Measure)) + geom_point() +
geom_line() + facet_wrap(~Measure) + theme_classic()
> dput(TrialData4)structure(list(Income = c("$10", "$25", "$40", "$75", "> $75",
"$10", "$25", "$40", "$75", "> $75", "$10", "$25", "$40", "$75",
"> $75", "$10", "$25", "$40", "$75", "> $75", "$10", "$25", "$40",
"$75", "> $75", "$10", "$25", "$40", "$75", "> $75", "$10", "$25",
"$40", "$75", "> $75", "$10", "$25", "$40", "$75", "> $75", "$10",
"$25", "$40", "$75", "> $75", "$10", "$25", "$40", "$75", "> $75",
"$10", "$25", "$40", "$75", "> $75", "$10", "$25", "$40", "$75",
"> $75", "$10", "$25", "$40", "$75", "> $75", "$10", "$25", "$40",
"$75", "> $75", "$10", "$25", "$40", "$75", "> $75", "$10", "$25",
"$40", "$75", "> $75", "$10", "$25", "$40", "$75", "> $75", "$10",
"$25", "$40", "$75", "> $75", "$10", "$25", "$40", "$75", "> $75",
"$10", "$25", "$40", "$75", "> $75", "$10", "$25", "$40", "$75",
"> $75", "$10", "$25", "$40", "$75", "> $75", "$10", "$25", "$40",
"$75", "> $75", "$10", "$25", "$40", "$75", "> $75", "$10", "$25",
"$40", "$75", "> $75", "$10", "$25", "$40", "$75", "> $75", "$10",
"$25", "$40", "$75", "> $75", "$10", "$25", "$40", "$75", "> $75"
), Percent = c(3.052, 2.292, 2.244, 1.706, 1.297, 29.76, 28.79,
29.51, 28.9, 31.67, 31.18, 32.64, 34.31, 35.65, 37.59, 36, 36.27,
33.94, 33.74, 29.44, 46.54, 54.01, 59.1, 62.17, 67.67, 24.75,
24.4, 25, 24.61, 24.02, 25.4, 18.7, 29, 11.48, 7.103, 3.052,
2.292, 2.244, 1.706, 1.297, 29.76, 28.79, 29.51, 28.9, 31.67,
31.18, 32.64, 34.31, 35.65, 37.59, 36, 36.27, 33.94, 33.74, 29.44,
46.54, 54.01, 59.1, 62.17, 67.67, 24.75, 24.4, 25, 24.61, 24.02,
25.4, 18.7, 29, 11.48, 7.103, 3.052, 2.292, 2.244, 1.706, 1.297,
29.76, 28.79, 29.51, 28.9, 31.67, 31.18, 32.64, 34.31, 35.65,
37.59, 36, 36.27, 33.94, 33.74, 29.44, 46.54, 54.01, 59.1, 62.17,
67.67, 24.75, 24.4, 25, 24.61, 24.02, 25.4, 18.7, 29, 11.48,
7.103, 3.052, 2.292, 2.244, 1.706, 1.297, 29.76, 28.79, 29.51,
28.9, 31.67, 31.18, 32.64, 34.31, 35.65, 37.59, 36, 36.27, 33.94,
33.74, 29.44, 46.54, 54.01, 59.1, 62.17, 67.67, 24.75, 24.4,
25, 24.61, 24.02, 25.4, 18.7, 29, 11.48, 7.103), Measure = c("MF None",
"MF None", "MF None", "MF None", "MF None", "MF Equity", "MF Equity",
"MF Equity", "MF Equity", "MF Equity", "MF Debt", "MF Debt",
"MF Debt", "MF Debt", "MF Debt", "MF Hybrid", "MF Hybrid", "MF Hybrid",
"MF Hybrid", "MF Hybrid", "Bank None", "Bank None", "Bank None",
"Bank None", "Bank None", "Bank Current", "Bank Current", "Bank Current",
"Bank Current", "Bank Current", "Bank Savings", "Bank Savings",
"Bank Savings", "Bank Savings", "Bank Savings", "MF None 1",
"MF None 1", "MF None 1", "MF None 1", "MF None 1", "MF Equity 1",
"MF Equity 1", "MF Equity 1", "MF Equity 1", "MF Equity 1", "MF Debt 1",
"MF Debt 1", "MF Debt 1", "MF Debt 1", "MF Debt 1", "MF Hybrid 1",
"MF Hybrid 1", "MF Hybrid 1", "MF Hybrid 1", "MF Hybrid 1", "Bank None 1",
"Bank None 1", "Bank None 1", "Bank None 1", "Bank None 1", "Bank Current 1",
"Bank Current 1", "Bank Current 1", "Bank Current 1", "Bank Current 1",
"Bank Savings 1", "Bank Savings 1", "Bank Savings 1", "Bank Savings 1",
"Bank Savings 1", "MF None 2", "MF None 2", "MF None 2", "MF None 2",
"MF None 2", "MF Equity 2", "MF Equity 2", "MF Equity 2", "MF Equity 2",
"MF Equity 2", "MF Debt 2", "MF Debt 2", "MF Debt 2", "MF Debt 2",
"MF Debt 2", "MF Hybrid 2", "MF Hybrid 2", "MF Hybrid 2", "MF Hybrid 2",
"MF Hybrid 2", "Bank None 2", "Bank None 2", "Bank None 2", "Bank None 2",
"Bank None 2", "Bank Current 2", "Bank Current 2", "Bank Current 2",
"Bank Current 2", "Bank Current 2", "Bank Savings 2", "Bank Savings 2",
"Bank Savings 2", "Bank Savings 2", "Bank Savings 2", "MF None 3",
"MF None 3", "MF None 3", "MF None 3", "MF None 3", "MF Equity 3",
"MF Equity 3", "MF Equity 3", "MF Equity 3", "MF Equity 3", "MF Debt 3",
"MF Debt 3", "MF Debt 3", "MF Debt 3", "MF Debt 3", "MF Hybrid 3",
"MF Hybrid 3", "MF Hybrid 3", "MF Hybrid 3", "MF Hybrid 3", "Bank None 3",
"Bank None 3", "Bank None 3", "Bank None 3", "Bank None 3", "Bank Current 3",
"Bank Current 3", "Bank Current 3", "Bank Current 3", "Bank Current 3",
"Bank Savings 3", "Bank Savings 3", "Bank Savings 3", "Bank Savings 3",
"Bank Savings 3")), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-140L))
On Thu, 29 Jun 2023 at 21:11, John Kane <jrkrideau using gmail.com> wrote:
> Anupa,
>
> I think your best bet with your data would be to tidy it up in Excel,
> read it into R using something like the readxl package and then supply
> some sample data is the dput() function.
>
> In the case of a large dataset something like dput(head(mydata, 100))
> should supply the data we need. Just do dput(mydata) where *mydata* is your
> data. Copy the output and paste it here.
>
> On Thu, 29 Jun 2023 at 08:37, Ebert,Timothy Aaron <tebert using ufl.edu> wrote:
>
>> Reposting the data did not help. We do not like to guess, and doing so
>> takes a great deal of time that is likely wasted.
>> Rows are observations.
>> Columns are variables.
>> In Excel, the first row will be variable names and all subsequent rows
>> will be observations.
>>
>> Income is the first variable. It has seven states: $10, $25, $40, $75,
>> >$75, "No", "Answer"
>> MF is the second variable. It has six values: 1, 2, 3, 4, 5, 9
>> None is the third variable. It has seven values: 1, 3.05, 2.29, 2.24,
>> 1.71, 1.30, 2.83
>> Equity is the last variable with many states, both numeric and text. A
>> computer will read it all as text.
>>
>> As written the data cannot be analyzed.
>>
>> Equity looks like it should be numeric. However, it has text values:
>> "Debt", "Hybrid", Bank", "AC", "None", "Current", "Savings", "No", and
>> "Answer"
>>
>> In looking at the data I try to find some organization where every
>> variable has the same number of rows as every other variable. I fail with
>> these data.
>> I could combine "No" and "Answer" into one name "No Answer" to make it
>> agree with MF, but then it does not work for None.
>>
>>
>> Please rework the data in Excel so that we can properly interpret the
>> content. If it is badly organized in Excel, moving it to R will not help.
>> Below, I tried adding carriage returns and spaces to organize the data,
>> but I have a column of numbers that are not identified. The values below
>> $10 do not make much sense compared to other values.
>>
>> I am tired of guessing.
>>
>> Tim
>>
>> -----Original Message-----
>> From: R-help <r-help-bounces using r-project.org> On Behalf Of Anupam Tyagi
>> Sent: Wednesday, June 28, 2023 11:49 PM
>> To: r-help using r-project.org
>> Subject: Re: [R] Plotting factors in graph panel
>>
>> [External Email]
>>
>> Thanks, Pikal and Jim. Yes, it has been a long time Jim. I hope you have
>> been well.
>>
>> Pikal, thanks. Your solution may be close to what I want. I did not know
>> that I was posting in HTML. I just copied the data from Excel and posted in
>> the email in Gmail. The data is still in Excel, because I have not yet
>> figured out what is a good way to organize it in R. I am posting it again
>> below as text. These are rows in Excel: 1,2,3,5,9 after MF are income
>> categories and No Answer category (9). Down the second column are
>> categories of MF and Bank AC. Rest of the columns are percentages.
>>
>> Jim, thanks for the graph. I am looking to plot only one line (category)
>> each in many small plots on the same page. I don't want to compare
>> different categories on the same graph as you do, but see how each category
>> varies by income, one category in each graph. Like Excel does with
>> Sparklines (Top menu: Insert, Sparklines, Lines). I have many categories
>> for many variables. I am only showing two MF and Bank AC.
>>
>> Income $10 $25 $40 $75 > $75 No Answer
>> MF 1 2 3 4 5 9
>> None 1 3.05 2.29 2.24 1.71 1.30
>> 2.83
>> Equity 2 29.76 28.79 29.51 28.90 31.67 36.77
>>
>> Debt 3 31.18 32.64 34.31 35.65 37.59 33.15
>>
>> Hybrid 4 36.00 36.27 33.94 33.74 29.44 27.25
>>
>> Bank AC None 1 46.54 54.01 59.1 62.17 67.67 60.87
>>
>> Current 2 24.75 24.4 25 24.61 24.02 21.09
>>
>> Savings 3 25.4 18.7 29 11.48 7.103 13.46
>>
>> No Answer 9 3.307 2.891 13.4 1.746 1.208 4.577
>>
>>
>> On Wed, 28 Jun 2023 at 17:30, Jim Lemon <drjimlemon using gmail.com> wrote:
>>
>> > Hi Anupam,
>> > Haven't heard from you in a long time. Perhaps you want something like
>> > this:
>> >
>> > at_df<-read.table(text=
>> > "Income MF MF_None MF_Equity MF_Debt MF_Hybrid Bank_None Bank_Current
>> > Bank_Savings Bank_NA
>> > $10 1 3.05 29.76 31.18 36.0 46.54 24.75 25.4 3.307
>> > $25 2 2.29 28.79 32.64 36.27 54.01 24.4 18.7 2.891
>> > $40 3 2.24 29.51 34.31 33.94 59.1 25.0 29 13.4
>> > $75 4 1.71 28.90 35.65 33.74 62.17 24.61 11.48 1.746
>> > >$75 5 1.30 31.67 37.59 29.44 67.67 24.02 7.103 1.208 No_Answer 9
>> > 2.83 36.77 33.15 27.25 60.87 21.09 13.46 4.577",
>> > header=TRUE,stringsAsFactors=FALSE)
>> > at_df<-at_df[at_df$Income!="No_Answer",which(names(at_df)!="Bank_NA")]
>> > png("MF_Bank.png",height=600)
>> > par(mfrow=c(2,1))
>> > matplot(at_df[,c("MF_None","MF_Equity","MF_Debt","MF_Hybrid")],
>> > type="l",col=1:4,lty=1:4,lwd=3,
>> > main="Percentages by Income and MF type",
>> > xlab="Income",ylab="Percentage of group",xaxt="n")
>> > axis(1,at=1:5,labels=at_df$Income)
>> > legend(3,24,c("MF_None","MF_Equity","MF_Debt","MF_Hybrid"),
>> > lty=1:4,lwd=3,col=1:4)
>> > matplot(at_df[,c("Bank_None","Bank_Current","Bank_Savings")],
>> > type="l",col=1:3,lty=1:4,lwd=3,
>> > main="Percentages by Income and Bank type",
>> > xlab="Income",ylab="Percentage of group",xaxt="n")
>> > axis(1,at=1:5,labels=at_df$Income)
>> > legend(3,54,c("Bank_None","Bank_Current","Bank_Savings"),
>> > lty=1:4,lwd=3,col=1:3)
>> > dev.off()
>> >
>> > Jim
>> >
>> > On Wed, Jun 28, 2023 at 6:33 PM Anupam Tyagi <anuptyagi using gmail.com>
>> wrote:
>> > >
>> > > Hello,
>> > >
>> > > I want to plot the following kind of data (percentage of respondents
>> > from a
>> > > survey) that varies by Income into many small *line* graphs in a
>> > > panel of graphs. I want to omit "No Answer" categories. I want to
>> > > see how each one of the categories (percentages), "None", " Equity",
>> > > etc. varies by
>> > Income.
>> > > How can I do this? How to organize the data well and how to plot? I
>> > thought
>> > > Lattice may be a good package to plot this, but I don't know for
>> > > sure. I prefer to do this in Base-R if possible, but I am open to
>> > > ggplot. Any
>> > ideas
>> > > will be helpful.
>> > >
>> > > Income
>> > > $10 $25 $40 $75 > $75 No Answer
>> > > MF 1 2 3 4 5 9
>> > > None 1 3.05 2.29 2.24 1.71 1.30 2.83 Equity 2 29.76 28.79 29.51
>> > > 28.90 31.67 36.77 Debt 3 31.18 32.64 34.31 35.65 37.59 33.15 Hybrid
>> > > 4 36.00 36.27 33.94 33.74 29.44 27.25 Bank AC None 1 46.54 54.01
>> > > 59.1 62.17 67.67 60.87 Current 2 24.75 24.4 25 24.61 24.02 21.09
>> > > Savings 3 25.4 18.7 29 11.48 7.103 13.46 No Answer 9 3.307 2.891
>> > > 13.4 1.746 1.208 4.577
>> > >
>> > > Thanks.
>> > > --
>> > > Anupam.
>> > >
>> > > [[alternative HTML version deleted]]
>> > >
>> > > ______________________________________________
>> > > R-help using r-project.org mailing list -- To UNSUBSCRIBE and more, see
>> > > https://st/
>> > > at.ethz.ch%2Fmailman%2Flistinfo%2Fr-help&data=05%7C01%7Ctebert%40ufl
>> > > .edu%7C59874e74164c46133f2c08db7853d28f%7C0d4da0f84a314d76ace60a6233
>> > > 1e1b84%7C0%7C0%7C638236073642897221%7CUnknown%7CTWFpbGZsb3d8eyJWIjoi
>> > > MC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C
>> > > %7C%7C&sdata=xoaDMG7ogY4tMtqe30pONZrBdk0eq2cW%2BgdwlDHneWY%3D&reserv
>> > > ed=0
>> > > PLEASE do read the posting guide
>> > http://www.r/
>> > -project.org%2Fposting-guide.html&data=05%7C01%7Ctebert%40ufl.edu%7C59
>> > 874e74164c46133f2c08db7853d28f%7C0d4da0f84a314d76ace60a62331e1b84%7C0%
>> > 7C0%7C638236073642897221%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiL
>> > CJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&sdata=H7
>> > 6XCa%2FULBGUn0Lok93l6mtHzo0snq5G0a%2BL4sEH8%2F8%3D&reserved=0
>> > > and provide commented, minimal, self-contained, reproducible code.
>> >
>>
>>
>> --
>> Anupam.
>>
>> [[alternative HTML version deleted]]
>>
>> ______________________________________________
>> R-help using r-project.org mailing list -- To UNSUBSCRIBE and more, see
>> https://stat.ethz.ch/mailman/listinfo/r-help
>> PLEASE do read the posting guide
>> http://www.r-project.org/posting-guide.html
>> and provide commented, minimal, self-contained, reproducible code.
>> ______________________________________________
>> R-help using r-project.org mailing list -- To UNSUBSCRIBE and more, see
>> https://stat.ethz.ch/mailman/listinfo/r-help
>> PLEASE do read the posting guide
>> http://www.R-project.org/posting-guide.html
>> and provide commented, minimal, self-contained, reproducible code.
>>
>
>
> --
> John Kane
> Kingston ON Canada
>
--
Anupam.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: TrialData4.png
Type: image/png
Size: 9740 bytes
Desc: not available
URL: <https://stat.ethz.ch/pipermail/r-help/attachments/20230706/7e56f392/attachment.png>
More information about the R-help
mailing list