##--------------------------------------------------------------## ## ## ##--------------------------------------------------------------## library(ggplot2) library(mgcv) library(fields) setwd('C:/temp/') nbr<-read.csv('nbr_data.csv',header=TRUE) valdat<-read.csv("validationdata_toR.csv",header=TRUE) nbr<-nbr head(nbr) head(nbr) names(nbr) ##Calculates Mean Occupancy Probability for a site and 95% CI spp.all<-names(nbr)[6:17] #pulls the names of the species of interest from the dataframe for(z in 1:length(spp.all)){#Loops through the species fmla1<-as.formula(paste(spp.all[z]," ~ 1"))#creats a model formula with the paste command (Intercept only for an overall mean) fit<-glm(fmla1,family=binomial,data=nbr)#Fits the GLM model print(spp.all[z]) #Prints species name print(plogis(confint(fit,type='link'))) #Prints the confidence interval (on probability scale) } ##--------------------------------------------------------------## ##Making Initial Plots to Look For Patterns (Marginal Responses)## ##--------------------------------------------------------------## bin.dat<-stats.bin(y=nbr$PORE,x=nbr$slope,N=15) emp.probs<-data.frame(PORE=bin.dat$stats[2,],slope=bin.dat$centers,N=bin.dat$stats[1,]) ggplot(nbr,aes(y=PORE,x=slope))+geom_point(alpha=.01)+ stat_smooth(method='gam',formula=y~s(x,k=4,bs='tp'),family=binomial)+ geom_point(data=emp.probs)+geom_text(data=emp.probs,aes(label=N,size=1,vjust=1))+ theme_bw()+scale_x_continuous('Slope')+scale_y_continuous('PORE Probability of Occurrence')+ theme(legend.position='') bin.dat<-stats.bin(y=nbr$PORE,x=nbr$sinasp,N=15) emp.probs<-data.frame(PORE=bin.dat$stats[2,],sinasp=bin.dat$centers,N=bin.dat$stats[1,]) ggplot(nbr,aes(y=PORE,x=sinasp))+geom_point(alpha=.01)+ stat_smooth(method='gam',formula=y~s(x,k=4,bs='tp'),family='binomial')+ geom_point(data=emp.probs)+geom_text(data=emp.probs,aes(label=N,size=1,vjust=1))+ theme_bw()+scale_x_continuous('SinAsp')+scale_y_continuous('PORE Probability of Occurrence')+ theme(legend.position='') bin.dat<-stats.bin(y=nbr$PORE,x=nbr$ndvi,N=15) emp.probs<-data.frame(PORE=bin.dat$stats[2,],ndvi=bin.dat$centers,N=bin.dat$stats[1,]) ggplot(nbr,aes(y=PORE,x=ndvi))+geom_point(alpha=.01)+ stat_smooth(method='gam',formula=y~s(x,k=4,bs='tp'),family='binomial')+ geom_point(data=emp.probs)+geom_text(data=emp.probs,aes(label=N,size=1,vjust=1))+ theme_bw()+scale_x_continuous('NDVI')+scale_y_continuous('PORE Probability of Occurrence')+ theme(legend.position='') #*****************************************************************************# #*********Model Validation****************************************************# #*****************************************************************************# #This section of code automates model validation and spits transect level cross #validation AUC values #install.packages("PresenceAbsence") library(ROCR) library(ggplot2) library(PresenceAbsence) #function to calculate AUC for model fit vars.all<-c(c(names(nbr)[18:23],names(nbr)[27:32]),"sinasp*cosasp")#vector of variable names pulled from the data frame or added interactions manually spp.all<-c("PORE","LIDA","CYOF","CIVU","CIAR","CANU","CEMA","HYPE")#Species of interest (must match column names in dataframe) spp.num<-c(6,7,12,13,14,15,16,17)#these numbers correspond to the column number of the associated species (e.g. PORE is column 6 of the dataframe - make sure order of these matches order of spp.all) #Species loop for(z in 1:length(spp.all)){ #Matrices to store data within the loop store.data<-matrix(0,50,7)#For linear GLM store.data2<-matrix(0,50,7)#For BIC linear GLM store.data3<-matrix(0,50,7)#For poly GLM store.data4<-matrix(0,50,7)#For BIC poly GLM spp<-spp.num[z] #this assigns the variable spp the value of the column with the species presence absence data clust.ids<-unique(nbr$REFFID)#This pulls out the unique names of the transects or clusters #50 cross validation replicates for(rep in 1:50){ transect.choose<-seq(0,35,1)#This creates a vector with the transect numbers (note: transects should be numbered numerically from 0 to n in the data frame) ran.o<-sample(transect.choose,36,replace=FALSE)#Creates a random order of the transects (36 in the case of the NBR data) #using that random transect IDs this creates a training set of data and then a test set #We start by doing a subset of the whole dataframe with the first transect from the random order #then we append the other transects until we reach 75% of the transects for the training data (26 transects for this data set) #The rest are put into the training dataset in the same fashion (transects 27 through 35 for these data) training.data<-subset(nbr,clust.ids==ran.o[1])#Subset dataframe with first transect in random order for(i in 2:26){#append the rest of the transects in the training data (26 is equal to 75% of the transects) training.data<-rbind(training.data,subset(nbr,clust.ids==ran.o[i])) } #Create another data frame with the rest of the transects for the test data test.data<-subset(nbr,clust.ids==ran.o[27]) for(i in 28:35){ test.data<-rbind(test.data,subset(nbr,clust.ids==ran.o[i])) } #These two lines of code generate the model formula based on the species and variables as defined above xnam <- paste(vars.all, sep="") fmla1 <- as.formula(paste(spp.all[z]," ~ ", paste(xnam, collapse= "+"))) #Fit the glm with just the training data fit<-glm(fmla1,family=binomial(link="logit"),data=training.data) #Generate predictions (fitted values) for the test data and pull observations from the real data fitted.vals<-predict(fit,newdata=test.data,type="response");obs<-test.data[,spp] #Calculate AUC using ROCR package to.rocr<-prediction(fitted.vals,obs) auc<-performance(to.rocr,'auc')@y.values[[1]] #Store the AUCs and calculated thresholds - these are calculated from 50 reps with random transects in the test and training data sets store.data[rep,1]<-spp.all[z];store.data[rep,2]<-auc #Though these lines of code are commented out, they do a BIC based model selection on the linear GLM and calculate the AUC for that model. #BIC selection (the argument "k=log(nrow(training.data))" is what makes it BIC selection instead of AIC selection #fit<-step(fit,k=log(nrow(training.data)),trace=0)#does stepwiseBIC model selection #fitted.vals<-predict(fit,newdata=test.data,type="response");obs<-test.data[,spp] #to.rocr<-prediction(fitted.vals,obs) #auc<-performance(to.rocr,'auc')@y.values[[1]] #store.data2[rep,1]<-spp.all[z];store.data2[rep,2]<-auc #Fit a model that allows a second order polynomial for each variable and calculate the AUC of that model fmla2<-as.formula(paste(spp.all[z]," ~ poly(Dist_fence,2) + poly(Dist_rd,2) + poly(FlowAcc,2) + poly(SolarRad,2) + poly(cosasp,2) + poly(sinasp,2) + poly(plancurv,2) + poly(procurv,2) + poly(curv,2) + poly(slope,2) + poly(evi,2) + poly(ndmi,2) + poly(ndvi,2) + poly(elevation,2) + poly(sinasp * cosasp,2)")) #Fit the polynomial model fit<-glm(fmla2,family=binomial(link="logit"),data=training.data) fitted.vals<-predict(fit,newdata=test.data,type="response");obs<-test.data[,spp] to.rocr<-prediction(fitted.vals,obs) auc<-performance(to.rocr,'auc')@y.values[[1]] #Store AUC and binary threshold data store.data3[rep,1]<-spp.all[z];store.data3[rep,2]<-auc #BIC selection on the polynomial model fit<-step(fit,k=log(nrow(training.data)),trace=0)#does stepwiseBIC model selection fitted.vals<-predict(fit,newdata=test.data,type="response");obs<-test.data[,spp] to.rocr<-prediction(fitted.vals,obs) auc<-performance(to.rocr,'auc')@y.values[[1]] #Store data store.data4[rep,1]<-spp.all[z];store.data4[rep,2]<-auc print(c(spp.all[z],rep)) #Lets you know what rep the code is on and for what species }#end rep loop #plot(store.data[,2],store.data[,4],main=spp.all[z],ylab="AUC",xlab="N") #Generates a dataframe of the above data so that some plots can be generated (As the code is now, only the linear, polynomical, and BIC selected polynomial are being recorded) ind2<-c(rep("linear",50),rep("poly",50),rep("poly_bic",50))#Creates an indicator variable so we know what model (50 of each as we did 50 reps) unified<-rbind(store.data,store.data3,store.data4)#Combines the data matrices AUC<-as.numeric(unified[,2]) mode(ind2)<-"character" mode(AUC)<-"numeric" unified2<-data.frame(cbind(ind2,AUC))#Combine variables colnames(unified2)<-c("model","AUC","seqsp.tst","maxssp.tst","seqsp.trn","maxssp.trn","n10th")#Name columns ## write a table with all the data for the species that is currently being modeled write.table(unified2,paste(spp.all[z],"unified_temp",sep=" "))#Save data table }#end spp loop #------------------------------------------------------------------------------------------------# #--------------End Model Validation Code - Plot the Results Below -------------------------------# #------------------------------------------------------------------------------------------------# #After all the species are run, this generates a grand data table with the AUC data spp.all<-c("PORE","LIDA","CYOF","CIAR","CANU","CEMA","HYPE") #Within the species loop, read in the output data tables for each species and append (rbind) them together unified<-read.table(paste(spp.all[1],"unified_temp.txt",sep=" "),header=TRUE) for(i in 2:length(spp.all)){ tmp<-read.table(paste(spp.all[i],"unified_temp.txt",sep=" "),header=TRUE) unified<-rbind(unified,tmp) } #Creates a species indicator variable spp.ind<-rep("PORE",150) for(i in 2:length(spp.all)){ spp.ind<-c(spp.ind,rep(spp.all[i],150)) } unified<-cbind(unified,spp.ind)#Bind together the species indicator variable and the data #model2<-ifelse(unified$model=='additive','linear','poly') #unified<-cbind(unified,model2) #Generate a boxplot of AUC for the different models and species library(ggplot2) ggplot(unified,aes(x=factor(model),AUC)) + geom_boxplot() + facet_wrap(~spp.ind,nrow=1) + opts(title="National Bison Range Model Validation")+ opts(strip.text.x = theme_text(size = 10, angle = 90)) + scale_x_discrete(name='Model Type') #Saves the plot to the current directory ggsave("Model validation for NBR.wmf",dpi=600) #----------------------------------------------------------------------------------------------# #----Do not procede below here - Material is for next lesson on making maps--------------------# #----------------------------------------------------------------------------------------------# ##Now that we have our best attempt at unbiased estimates of binary calssification threshold and AUCs ##We fit the models with all the data for the purposes of map calculation. This script prints out the coefficients for the #Unselected model and the BIC selected model for each species within a loop. vars.all<-c(c(names(nbr)[18:23],names(nbr)[27:32]),"sinasp*cosasp") spp.all<-c("PORE","LIDA","CYOF","CIVU","CIAR","CANU","CEMA","HYPE") spp.num<-c(6,7,12,13,14,15,16,17) #Species loop for(z in 1:length(spp.all)){ xnam <- paste(vars.all, sep="") fmla1 <- as.formula(paste(spp.all[z]," ~ ", paste(xnam, collapse= "+"))) fit<-glm(fmla1,family=binomial(link="logit"),data=training.data) fitted.vals<-predict(fit,newdata=nbr,type="response");obs<-nbr[,spp] auc<-colAUC(fitted.vals,obs,plotROC=FALSE) fit.reduced<-step(fit,k=log(nrow(training.data)),trace=0)#does stepwiseBIC model selection print(spp.all[z]) print(coef(fit)) print(coef(fit.reduced)) } #If you are curious, using this package and the function VIF you can easily calcualte the #Variance inflation factors for your covariates. This is a rough test of multicollinearity #If the VIFs are greater than 10 (rule of thumb that is not well respected anymore) it #Generally means there is strong collinearity between two of your variables #This is generally not an issure for prediction but can be for interpretation #install.packages('DAAG') library(DAAG) vif(fit) #This does the same thing described above but for the polynomial model. ##Models with all data vars.all<-c(c(names(nbr)[18:23],names(nbr)[27:32]),"sinasp*cosasp") spp.all<-c("PORE","LIDA","CYOF","CIVU","CIAR","CANU","CEMA","HYPE") spp.num<-c(6,7,12,13,14,15,16,17) #Species loop for(z in 1:length(spp.all)){ spp<-spp.num[z] xnam <- paste(vars.all, sep="") fmla1<-as.formula(paste(spp.all[z]," ~ Dist_fence + I(Dist_fence ^ 2) + Dist_rd + I(Dist_rd ^ 2) + FlowAcc + I(FlowAcc ^ 2) + SolarRad + I(SolarRad ^ 2) + cosasp + I(cosasp ^ 2) + sinasp + I(sinasp ^ 2) + curv + I(curv ^ 2) + slope + I(slope ^ 2) + evi + I(evi ^ 2) + ndmi + I(ndmi ^ 2) + ndvi + I(ndvi ^ 2) + elevation + I(elevation ^ 2) + sinasp * cosasp + I(sinasp * cosasp ^ 2)")) fit<-glm(fmla1,family=binomial(link="logit"),data=training.data) fitted.vals<-predict(fit,newdata=nbr,type="response");obs<-nbr[,spp] auc<-colAUC(fitted.vals,obs,plotROC=FALSE) fit.reduced<-step(fit,k=log(nrow(training.data)),trace=0)#does stepwiseBIC model selection print(spp.all[z]) print(auc) print(coef(fit.reduced)) } #Variance inflation factors again #install.packages('DAAG') library(DAAG) vif(fit) #************************************************************# #************Hierarchical Variance Partitioning**************# #************************************************************# #What variables are most important to the species distribution? install.packages('hier.part') library(hier.part) library(gtools) #Hierarchical partitioning can only work for the lienar model (without polynomials) #Also, it only work when the # of independent variables is less than 12 #The function requires a dataframe of the independent variables and that is what is created below. vars.all<-c("Dist_fence","Dist_rd","FlowAcc","SolarRad","cosasp","sinasp","slope","evi","ndmi","ndvi","elevation","sinasp*cosasp") vars.hp<-data.frame(cbind(nbr$Dist_fence,nbr$Dist_rd,nbr$FlowAcc,nbr$SolarRad,nbr$cosasp,nbr$sinasp,nbr$slope,nbr$evi,nbr$ndmi,nbr$ndvi,nbr$elevation,nbr$sinasp*nbr$cosasp)) #These lines carry out the HP for each species. The arguments indicate the data are binomial and to use the log(Likelihood) to calculate the #Joint and independent contribution of each variable. It also automatically outputs a barplot of the independent contribution of each variable #The y= argument just pulls the PA for the species of interest hp.pore<-hier.part(y=nbr$PORE,xcan=vars.hp, fam="binomial",gof="logLik",barplot=TRUE) hp.lida<-hier.part(y=nbr$LIDA,xcan=vars.hp, fam="binomial",gof="logLik",barplot=TRUE) hp.cyof<-hier.part(y=nbr$CYOF,xcan=vars.hp, fam="binomial",gof="logLik",barplot=TRUE) hp.ciar<-hier.part(y=nbr$CIAR,xcan=vars.hp, fam="binomial",gof="logLik",barplot=TRUE) hp.canu<-hier.part(y=nbr$CANU,xcan=vars.hp, fam="binomial",gof="logLik",barplot=TRUE) hp.cema<-hier.part(y=nbr$CEMA,xcan=vars.hp, fam="binomial",gof="logLik",barplot=TRUE) hp.hype<-hier.part(y=nbr$HYPE,xcan=vars.hp, fam="binomial",gof="logLik",barplot=TRUE) #Though this below code is not tested, it should generate a data frame of the percent independent contribution and #the percent joint contribution of each variable to each species that can then be plotted #This code shows how it is done for PORE - substitute in the appropriate places to do for other species. total.I<-sum(hp.pore$IJ[,1])#Calculates Total Independent Variation total.J<-sum(hp.pore$IJ[,2])#Calculates Total Joint Variation pI<-(hp.pore$IJ[,1]/total.I)*100#Turns into percent independent contribution pJ<-(hp.pore$IJ[,2]/total.J)*100#Turns into percent joint contribution spID<-rep("PORE",2*ncol(vars.hp))#Creats variable for Species Type<-c(rep("Independent",ncol(vars.hp)),rep("Joint",ncol(vars.hp)))#Creates variable for type of variation Variable<-rep(vars.all,2)#Creats variable for the actual variable #Create a data frame hp.dat<-cbind(c(pI,pJ),spID,Type,Variable) colnames(hp.dat)<-c("pVar","spID","Type","Variable") hp.dat2<-data.frame(hp.dat) write.table(hp.dat2,"hp_dat_pore.txt") hp.dat2<-read.table("hp_dat_pore.txt",header=TRUE) #Plot results ggplot(hp.dat2,aes(x=Variable,y=pVar,fill=Type)) + geom_bar(position=position_dodge(),colour='black') + theme_bw() + opts(axis.text.x=theme_text(angle=-90)) + scale_fill_manual(values=c("#FFFFFF", "#000000")) + opts(title="Percent Variation Explained by Independent Variables") + scale_y_continuous("Percent Variation Explained") #****************************************************************************# #*******************To Make Raster Calculator Equations**********************# #****************************************************************************# #This bit of code is perhaps overly complex but it generates appropriately formatted equations to be input into the #ArcView 9.x raster calculator to make PO maps ##This function matches up the raster names in ArcView with the names the variable are given in the R Data frame #This is a long vector of the names of the rasters with the appropriate brackets around them as they are seen in ArcView #Note the "Pow(,2)" variables are the second order polynomials of the existing variables. This is an internal function in the raster calculator #Make sure you run the rast.names code and the whole pull.varname function before proceding rast.names<-c("[distfen_nbr4]","[distrd_nbr3]","[FlowAccum]","[solar_rad_nbr]", "[cosasp_nbr]","[sinasp_nbr]","[Slope_nbr]","[evi_nbr]","[ndmi_nbr]","[ndvi_nbr]","[DEM_UTM]", "[cosasp_nbr] * [sinasp_nbr]","Pow([distfen_nbr4],2)","Pow([distrd_nbr3],2)","Pow([FlowAccum],2)","Pow([solar_rad_nbr],2)", "Pow([cosasp_nbr],2)","Pow([sinasp_nbr],2)","Pow([Slope_nbr],2)","Pow([evi_nbr],2)","Pow([ndmi_nbr],2)","Pow([ndvi_nbr],2)","Pow([DEM_UTM],2)", "Pow([cosasp_nbr],2) * Pow([sinasp_nbr],2)") #this is a function that just uses if else statement to match up the raster names to the variable names in the model #for example: in the R data, Distanec to Fence is called "Dist_fence" and that corresponds to the raster "[distfen_nbr4]" which is the first item in the rast.names vector pull.varname<-function(varb){ if(varb=="Dist_fence"){ var.name<-rast.names[1] } else if(varb=="Dist_rd"){ var.name<-rast.names[2] } else if(varb=="FlowAcc"){ var.name<-rast.names[3] } else if(varb=="SolarRad"){ var.name<-rast.names[4] } else if(varb=="cosasp"){ var.name<-rast.names[5] } else if(varb=="sinasp"){ var.name<-rast.names[6] } else if(varb=="slope"){ var.name<-rast.names[7] } else if(varb=="evi"){ var.name<-rast.names[8] } else if(varb=="ndmi"){ var.name<-rast.names[9] } else if(varb=="ndvi"){ var.name<-rast.names[10] } else if(varb=="elevation"){ var.name<-rast.names[11] } else if(varb=="cosasp:sinasp"){ var.name<-rast.names[12] } else if(varb=="I(Dist_fence^2)"){ var.name<-rast.names[13] } else if(varb=="I(Dist_rd^2)"){ var.name<-rast.names[14] } else if(varb=="I(FlowAcc^2)"){ var.name<-rast.names[15] } else if(varb=="I(SolarRad^2)"){ var.name<-rast.names[16] } else if(varb=="I(cosasp^2)"){ var.name<-rast.names[17] } else if(varb=="I(sinasp^2)"){ var.name<-rast.names[18] } else if(varb=="I(slope^2)"){ var.name<-rast.names[19] } else if(varb=="I(evi^2)"){ var.name<-rast.names[20] } else if(varb=="I(ndmi^2)"){ var.name<-rast.names[21] } else if(varb=="I(ndvi^2)"){ var.name<-rast.names[22] } else if(varb=="I(elevation^2)"){ var.name<-rast.names[23] } else if(varb=="I(sinasp * cosasp^2)"){ var.name<-rast.names[24] } }#end function pull.varname ####Next Bit of Code: This fits the desired model, pulls the coefficients from the model and generates the raster equation based on the raster names you defined above ##This code loops through all the species and prints out the raster calculator equations as it goes. spp.all<-c("PORE","LIDA","CYOF","CIVU","CIAR","CANU","CEMA","HYPE") vars.all<-c(c(names(nbr)[18:23],names(nbr)[28:32]),"sinasp*cosasp") for(z in 1:length(spp.all)){ xnam <- paste(vars.all, sep="") fmla1 <- as.formula(paste(spp.all[z]," ~ ", paste(xnam, collapse= "+"))) fit<-glm(fmla1,family=binomial,data=nbr) polys<-paste("I(",vars.all,"^2)", sep="") xnam2 <- paste(c(vars.all,polys), sep="") fmla2 <- as.formula(paste(spp.all[z]," ~ ", paste(xnam2, collapse= "+"))) fit2<-glm(fmla2,family=binomial,data=nbr) #Linear variables<-names(coef(fit)) form.num<-paste("(exp(",coef(fit)[1]," + ",sep="") form.den<-paste("(1 + exp(",coef(fit)[1]," + ",sep="") for(i in 2:length(variables)){ var.name<-pull.varname(variables[i]) co.val<-coef(fit)[variables[i]] ifelse(i==max(length(variables)),form.num<-paste(form.num,co.val," * ",var.name," ))",sep=""),form.num<-paste(form.num,co.val," * ",var.name," + ",sep="")) ifelse(i==max(length(variables)),form.den<-paste(form.den,co.val," * ",var.name," ))",sep=""),form.den<-paste(form.den,co.val," * ",var.name," + ",sep="")) } print(c(spp.all[z],"Linear Model")) print(paste(form.num," / ",form.den)) #Poly variables<-names(coef(fit2)) form.num<-paste("(exp(",coef(fit2)[1]," + ",sep="") form.den<-paste("(1 + exp(",coef(fit2)[1]," + ",sep="") for(i in 3:length(variables)){ var.name<-pull.varname(variables[i]) co.val<-coef(fit2)[variables[i]] ifelse(i==max(length(variables)),form.num<-paste(form.num,co.val," * ",var.name," ))",sep=""),form.num<-paste(form.num,co.val," * ",var.name," + ",sep="")) ifelse(i==max(length(variables)),form.den<-paste(form.den,co.val," * ",var.name," ))",sep=""),form.den<-paste(form.den,co.val," * ",var.name," + ",sep="")) } print(c(spp.all[z],"Polynomial Model")) print(paste(form.num," / ",form.den)) }#End Species Loop ##Raster Calculator Formula #paste(form.num," / ",form.den) #**************************************************************************# #***********************Making Partial Regression Plots********************# #**************************************************************************# #This code generates single variable regressions for all the species spp.all<-c("PORE","LIDA","CYOF","CIVU","CIAR","CANU","CEMA","HYPE")#Define your species vars.all<-c(c(names(nbr)[18:23],names(nbr)[28:32]),"sinasp*cosasp")#Define the variables you are interested in #This code fits a single variable regression and saves the figures for each species as it goes. #Look in the working directory after the code is done to see the images. library(ggplot2) for(z in 1:length(spp.all)){ i=1 xnam <- paste(vars.all, sep="") fmla1<-as.formula(paste(spp.all[z]," ~ ",vars.all[i])) fmla2<-as.formula(paste(spp.all[z]," ~ ",vars.all[i]," + I(",vars.all[i],"^2)")) fit<-glm(fmla1,family=binomial(link="logit"),data=nbr) fit.poly<-glm(fmla2,family=binomial(link="logit"),data=nbr) vars<-c(18:23,28:32) rng.var<-range(nbr[,vars[i]]) new.x<-seq(rng.var[1],rng.var[2],(rng.var[2]-rng.var[1])/1000) new.x<-data.frame(new.x) colnames(new.x)<-vars.all[i] po<-predict(fit,newdata<-new.x,type='response') po.poly<-predict(fit.poly,newdata<-new.x,type='response') store.update<-cbind(po,po.poly,new.x,rep(vars.all[i],1001)) colnames(store.update)<-c("PO","PO.poly","X.Value","Variable") for(i in 2:(length(vars.all)-1)){ xnam <- paste(vars.all, sep="") fmla1<-as.formula(paste(spp.all[z]," ~ ",vars.all[i])) fmla2<-as.formula(paste(spp.all[z]," ~ ",vars.all[i]," + I(",vars.all[i],"^2)")) fit<-glm(fmla1,family=binomial(link="logit"),data=nbr) fit.poly<-glm(fmla2,family=binomial(link="logit"),data=nbr) vars<-c(18:23,28:32) rng.var<-range(nbr[,vars[i]]) new.x<-seq(rng.var[1],rng.var[2],(rng.var[2]-rng.var[1])/1000) new.x<-data.frame(new.x) colnames(new.x)<-vars.all[i] po<-predict(fit,newdata<-new.x,type='response') po.poly<-predict(fit.poly,newdata<-new.x,type='response') store.dat<-cbind(po,po.poly,new.x,rep(vars.all[i],1001)) colnames(store.dat)<-c("PO","PO.poly","X.Value","Variable") store.update<-rbind(store.update,store.dat) } write.table(store.update,paste(spp.all[z]," Single Variable PO.txt")) ggplot(store.update,aes(x=X.Value,y=PO)) + geom_line() + facet_wrap(~Variable,scale='free_x') + scale_x_continuous(name='X Value for Each Variable') + scale_y_continuous(name='Probability of Occurrence') + opts(title=paste(spp.all[z]," Single Variable Regression Plots - Linear Model")) ggsave(paste(spp.all[z]," single variable regresssions - linear.pdf"),dpi=300,height=8,width=11) ggplot(store.update,aes(x=X.Value,y=PO.poly)) + geom_line() + facet_wrap(~Variable,scale='free_x') + scale_x_continuous(name='X Value for Each Variable') + scale_y_continuous(name='Probability of Occurrence') + opts(title=paste(spp.all[z]," Single Variable Regression Plots - Polynomial Model")) ggsave(paste(spp.all[z]," single variable regresssions - poly.pdf"),dpi=300,height=8,width=11) }