[R] R Help

Tue Mar 3 00:03:30 CET 2015

Better ask for local help if you can't reduce your code to some minimal 
examples so that we can understand easily what you are looking for.

On 02.03.2015 10:38, Rami Alzebdieh wrote:
> Dear Sir,
>
> I start using (R) 3 months ago,  and I am still learning,

Same for me .... after more than 16 years.

Best,
Uwe Ligges

 > I have a project and I am using R in this project, my friend helped 
me to build a code for this project and it's working perfect, but I need 
to make a small change in, it looks very simple but for me it's very 
complicated. I insert the code and I hope if you can help me this 
problem. I highlighted what exactly I need to change. This project is 
calculating the market and industry weighted returns for each based on 
the date levels.
>
> sync = read.csv("country-14.csv",header=T)
> id.country = 14
>
> sync = sync[sync$country!="country" & sync$country==id.country,-c(2,5)]
> sync$price=as.numeric(as.character(sync$price))
> sync$mv=as.numeric(as.character(sync$mv))
> attach(sync)
>
> #### Calculate returns and add to the dataset
> n.comp = nlevels(as.factor(as.character(sync$company_name)))
> comp.names = levels(as.factor(as.character(sync$company_name)))
> data = vector("list",n.comp)
> for(i in 1:n.comp){
>    temp = sync[sync$company_name==comp.names[i],]
>    data[[i]] = cbind(temp,c(NA,diff(temp$price)/temp$price[1:(length(temp$price)-1)]))
> }
> sync = do.call(rbind,data)
> names(sync)[7] = "returns"
> detach(sync)
> attach(sync)
>
> #### Fill industry_code column
> industry_code=rep(NA,dim(sync)[1])
> for(i in 1:dim(sync)[1]){
>    if(nchar(as.character(company_code[i])) == 3){
>      industry_code[i] = as.numeric(substr(as.character(company_code[i]),1,1))
>    } else {
>      industry_code[i] = as.numeric(substr(as.character(company_code[i]),1,2))
>    }
>    print((i/dim(sync)[1])*100)
> }
> sync = cbind(sync,as.factor(industry_code))
> names(sync)[8] = "industry_code"
> detach(sync)
> attach(sync)
>
> #### Calculate market weighted returns and add to the dataset
> market_returns = rep(NA,dim(sync)[1])
> industry_returns = rep(NA,dim(sync)[1])
> for(i in 1:nlevels(date)){
>      data = sync[date==levels(date)[i],]
>      data$company_name = as.factor(as.character(data$company_name))
>      for(m in 1:nlevels(data$company_name)){
>        index1 = data$company_name == levels(data$company_name)[m]
>        index2 = date==levels(date)[i] & company_name==levels(data$company_name)[m]
>        market_returns[index2] = (sum(data$returns*(data$mv/sum(data$mv,na.rm=TRUE)),na.rm=TRUE) -
>          (data$returns[index1]*(data$mv[index1]/sum(data$mv,na.rm=TRUE))))/(nlevels(data$company_name)-1) ## this what I need to change, instead of using the number of levels companies in the dataset (nlevels(data$company_name) , I need to put the number of returns values(data$returns) without NA (by the way this code is calculating returns at the date level as you can see from above)
>      }
>    print(i/nlevels(date))
> }
>
> sync = cbind(sync,market_returns)
> names(sync)[9] = "market_returns"
> detach(sync)
> attach(sync)
>
> #### Calculate industry weighted returns and add to the dataset
> for(i in 1:nlevels(date)){
>      for(k in 1:nlevels(as.factor(as.character(industry_code)))){
>        data1 = sync[date==levels(date)[i] & industry_code==levels(as.factor(as.character(industry_code)))[k],]
>        data1$company_name = as.factor(as.character(data1$company_name))
>        for(l in 1:nlevels(data1$company_name)){
>          index3 = data1$company_name == levels(data1$company_name)[l]
>          index4 = date==levels(date)[i] & company_name==levels(data1$company_name)[l]
>          industry_returns[index4] = (sum(data1$returns*(data1$mv/sum(data1$mv,na.rm=TRUE)),na.rm=TRUE) -
>            (data1$returns[index3]*(data1$mv[index3]/sum(data1$mv,na.rm=TRUE))))/(nlevels(data1$company_name)-1) ## also here I need to change, instead of using the number of levels companies in the dataset (nlevels(data1$company_name) , I need to put the number of returns values(data1$returns) without NA (by the way this code is calculating returns at the date level and industry level as you can see from above)
>
>        }
>      }
>    print(i/nlevels(date))
> }
>
> sync = cbind(sync,industry_returns)
> names(sync)[10] = "industry_returns"
> detach(sync)
> attach(sync)
>
> year = apply(as.matrix(sync$date),1,function(x) as.factor(substr(as.character(x),7,10)))
> sync = cbind(sync,as.factor(year))
> names(sync)[11] = "year"
> sync = sync[sync$year!="1999",]
> sync$year = as.factor(as.character(sync$year))
> detach(sync)
> attach(sync)
>
> year = as.factor(as.character(year))
> industry_code = as.factor(as.character(industry_code))
> comp.per.ind = rep(NA, dim(sync)[1])
> for(i in 1:nlevels(year)){
>    for(j in 1:nlevels(industry_code)){
>      index = year==levels(year)[i] & industry_code==levels(industry_code)[j]
>      data = sync[index,]
>      comp.per.ind[index] = nlevels(as.factor(as.character(data$company_name)))
>    }
> }
>
> sync = cbind(sync,as.factor(comp.per.ind))
> names(sync)[12] = "comp.per.ind"
> detach(sync)
> attach(sync)
>
> write.csv(sync,paste("Returns_data",id.country,".csv",sep=""))
>
>
>
>
> Thank you for your help
>
> Rami Alzebdieh
>
>
>
> 	[[alternative HTML version deleted]]
>
> ______________________________________________
> R-help at r-project.org mailing list -- To UNSUBSCRIBE and more, see
> https://stat.ethz.ch/mailman/listinfo/r-help
> PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
> and provide commented, minimal, self-contained, reproducible code.
>