library(readxl) library(dplyr) yearSet <- 2022 baselineLevel <- .25 yearGames <- 162 Batting <- read_excel("~/Documents/Baseball Stats/Player Value/2022/StanBatting_2022.xlsx") Fielding <- read_excel("~/Documents/Baseball Stats/Player Value/2022/StanFielding_2022.xlsx") CatchFielding <- read_excel("~/Documents/Baseball Stats/Player Value/2022/CatchBaseFielding_2022.xlsx") Appearances <- read_excel("~/Documents/Baseball Stats/Player Value/2022/Appearances_2022.xlsx") Batting$Rk <- NULL Batting$`OPS+`<- NULL Batting$yearID <- yearSet Batting$Age <- NULL Batting$TB <- NULL Batting$PA <- NULL Batting$BA <- NULL Batting$OBP <- NULL Batting$SLG <- NULL Batting$OPS <- NULL Batting = Batting[Batting$`Name-additional`!='-9999',] Batting <- rename(Batting,playerID=`Name-additional`,teamID=Tm,lgID=Lg,Doub=`2B`,Trip=`3B`) Batting <- Batting[,c(22,23,2:3,1,4:15,20,17,18:19,16)] Batting$TeamGames <- yearGames #remove * and # from names substrRight <- function(x, n){ substr(x, nchar(x)-n+1, nchar(x)) } Batting$Name = ifelse(substrRight(Batting$Name,1)=="*",substr(Batting$Name,1,nchar(Batting$Name)-1),Batting$Name) Batting$Name = ifelse(substrRight(Batting$Name,1)=="#",substr(Batting$Name,1,nchar(Batting$Name)-1),Batting$Name) Fielding$Rk <- NULL Fielding$Rtot <- NULL Fielding$Rdrs <- NULL Fielding$`Rdrs/yr`<- NULL Fielding$Rgood <- NULL Fielding$`Rtot/yr`<- NULL Fielding$Age <- NULL Fielding$yearID <- 2022 Fielding <- rename(Fielding,playerID=`Name-additional`,lgID_field=Lg,teamID_field=Tm,G_field=G) Fielding$POS <- Fielding$`Pos Summary` Fielding$`Pos Summary`<- NULL Fielding$CG <- NULL CatchFielding$Rk <- NULL CatchFielding <- rename(CatchFielding,playerID=`Name-additional`,teamID_field=Tm,SB_catch=SB,CS_catch=CS) CatchFielding$Age <- NULL CatchFielding$SBO <- NULL CatchFielding = CatchFielding[,-c(7:20)] Fielding = left_join(Fielding,CatchFielding,by=c("playerID","teamID_field","Name")) Fielding$Ch <- NULL Fielding$`Fld%` <- NULL Fielding$`RF/9`<-NULL Fielding$`RF/G`<-NULL Fielding$Name<-NULL Fielding = Fielding[,c(10:11,1:2,12,3:9,13:16)] Appearances$Rk <- NULL Appearances$Age <- NULL Appearances$Yrs <- NULL Appearances$GS <- NULL Appearances$Batting <- NULL Appearances$Defense <- NULL Appearances <- rename(Appearances,playerID=`Name-additional`,teamID_App=Tm,G_all=G,G_p=P,G_c=C,G_1b=`1B`,G_2b=`2B`, G_3b=`3B`,G_ss=SS,G_lf=LF,G_cf=CF,G_rf=RF,G_dh=DH,G_of=OF,G_pr=PR,G_ph=PH) Appearances$yearID=yearSet Appearances$lgID_App <- 'DELETE LATER' Appearances = Appearances[,c(17:18,2,19,1,3:16)] Appearances$Name <- NULL total_step = left_join(Batting,Fielding, by = c("yearID","playerID")) total = left_join(total_step,Appearances, by = c("yearID","playerID")) #resolve team, league, and position NAs total[is.na(total$teamID_field),"teamID_field"] = total[is.na(total$teamID_field),"teamID"] total[is.na(total$lgID_field),"lgID_field"] = total[is.na(total$lgID_field),"lgID"] total[is.na(total$POS),"POS"] = "DH/PH/PR" total[is.na(total$teamID_App),"teamID_App"] = total[is.na(total$teamID_App),"teamID"] #create proportion of games played at each position columns attach(total) total$G_tot = G_p+G_c + G_1b+G_2b+G_3b+G_ss+G_lf+G_cf+G_rf+G_dh attach(total) total$PropPitch = G_p/G_tot total$PropCatch = G_c/G_tot total$PropFirst = G_1b/G_tot total$PropSecond = G_2b/G_tot total$PropThird = G_3b/G_tot total$PropShort = G_ss/G_tot total$PropLeft = G_lf/G_tot total$PropCenter = G_cf/G_tot total$PropRight = G_rf/G_tot total$PropOutfield = G_of/G_tot total$PropDH = G_dh/G_tot #create columns for other values total$Sing = total$H - total$Doub - total$Trip - total$HR total$PA = total$AB + total$BB + total$HBP + total$SF + total$SH total$uBB = total$BB - total$IBB total$Inn2 = total$Inn total$Ch = total$PO + total$A + total$E total$otherOut = total$AB - total$H - total$SO - total$GDP total$SBpercent = total$SB / (total$SB + total$CS) total$BaseVA = (.15*total$SB - .39*total$CS) / (total$SB + total$CS) total$CSpercent = total$CS_catch / (total$CS_catch + total$SB_catch) total$BA = total$H/total$AB total$OBP = (total$H+total$BB+total$HBP)/(total$AB+total$BB+total$HBP+total$SF) total$SLG = (1*total$Sing+2*total$Doub+3*total$Trip+4*total$HR)/total$AB total$OPS = total$OBP + total$SLG total$BatVA = (.22*total$uBB+.17*total$IBB+.24*total$HBP+.30*total$Sing+.58*total$Doub+.91*total$Trip+1.29*total$HR-.10*total$SF-.34*total$SO-.33*total$otherOut-.22*total$SH-.75*total$GDP)/(total$AB+total$BB+total$HBP+total$SF+total$SH) total$FieldPercent = (total$PO+total$A)/total$Ch total$FieldVA = ifelse(total$SB_catch>0 & total$CS_catch>0, ((.33*total$PO+.33*total$A+.10*total$DP-.26*total$PB-.68*total$E)/(total$Inn)) + (.39*total$CS_catch-.15*total$SB_catch)/(total$CS_catch+total$SB_catch), (.33*total$PO+.33*total$A+.10*total$DP-.26*total$PB-.68*total$E)/(total$Inn)) total$RFG = (total$PO+total$A)/(total$G_tot-total$G_dh) total$RF9 = 9*(total$PO+total$A)/total$Inn #set NA values to 0 total[is.na(total)] = 0 temp=total #reorder columns #move up singles, 66th column, to be after hits and before doubles total = total[,c(1:9,66,10:65,67:83)] #move up PAs, now 67th column, to be before at bats total = total[,c(1:6,67,7:66,68:83)] #move up uBBs, now 68th column, to be after walks total = total[,c(1:18,68,19:67,69:83)] #move up innings, now 69th column, to be before inning outs total = total[,c(1:31,69,32:68,70:83)] #move up defensive changes, now column 70, to be after inning outs and before putouts total = total[,c(1:33,70,34:69,71:83)] #move up otherOuts, now column 71, to be after GIDP total = total[,c(1:25,71,26:70,72:83)] #move up stolen basen percentage and Baserunning Value Average, now columns 72 and 73, to be after SB and CS total = total[,c(1:17,72:73,18:71,74:83)] #move up catcher caught stealing percentage, now column 74, to be after catcher SB and CS total = total[,c(1:45,74,46:73,75:83)] #move up batting rate stats, now columns 75 to 79, to be after batting count stats total = total[,c(1:28,75:79,29:74,80:83)] #move up fielding rate stats, now columns 80 to 83, to be after fielding count stats total = total[,c(1:51,80:83,52:79)] #load in wOBA weights for each season to calculate wOBA library(readxl) wOBAweights <- read_excel("~/Documents/Baseball Stats/Player Value/Yearly_wOBA_weights.xlsx") wOBAweights$yearID <- wOBAweights$Season wOBAweights$Season <- NULL wOBAweights$wOBA <- NULL wOBAweights$wOBAScale <- NULL total <- left_join(total,wOBAweights,by="yearID") total$wOBA <- (total$wBB*total$uBB+total$wHBP*total$HBP+total$w1B*total$Sing+total$w2B*total$Doub+total$w3B*total$Trip+total$wHR*total$HR)/(total$AB+total$BB-total$IBB+total$SF+total$HBP) #move wOBA column and get rid of now unneeded wOBA weight columns total$wBB <- NULL total$wHBP <- NULL total$w1B <- NULL total$w2B <- NULL total$w3B <- NULL total$wHR <- NULL total$runSB <- NULL total$runCS <- NULL total$cFIP <- NULL total <- total[,c(1:32,84,33:83)] #round rate values total$BA <- round(total$BA,digits=3) total$OBP <- round(total$OBP,digits=3) total$SLG <- round(total$SLG,digits=3) total$OPS <- round(total$OPS,digits=3) total$wOBA <- round(total$wOBA,digits=3) total$BatVA <- round(total$BatVA,digits=4) total$SBpercent <- round(total$SBpercent,digits=4) total$BaseVA <- round(total$BaseVA,digits=4) total$CSpercent <- round(total$CSpercent,digits=4) total$FieldPercent <- round(total$FieldPercent,digits=3) total$FieldVA <- round(total$FieldVA,digits=4) total$RFG <- round(total$RFG,digits=2) total$RF9 <- round(total$RF9,digits=2) total$PropPitch <- round(total$PropPitch,digits=4) total$PropCatch <- round(total$PropCatch,digits=4) total$PropFirst <- round(total$PropFirst,digits=4) total$PropSecond <- round(total$PropSecond,digits=4) total$PropThird <- round(total$PropThird,digits=4) total$PropShort <- round(total$PropShort,digits=4) total$PropLeft <- round(total$PropLeft,digits=4) total$PropCenter <- round(total$PropCenter,digits=4) total$PropRight <- round(total$PropRight,digits=4) total$PropOutfield <- round(total$PropOutfield,digits=4) total$PropDH <- round(total$PropDH,digits=4) #add in player first and last names final = total final$nameFirst = final$Name final$nameLast = final$Name #put names at front final = final[,c(1,86,85,2:84)] #output file write.csv(final,"~/Documents/Baseball Stats/Player Value/2022/PosRawCombData.csv") #filter to players in 2010 and add columns for relative 1Q values batters = filter(final,yearID==yearSet) #add baserunner SB attempts and catcher CS opps and reorder columns and remove NAs batters$SB_att = batters$SB + batters$CS batters$CSopps = batters$CS_catch + batters$SB_catch batters = batters[,c(1:19,87,20:53,88,54:86)] batters[is.na(batters)] = 0 #get player per PA values and per Chance/Opportunity Values batters$ABperPA = batters$AB/batters$PA batters$RperPA = batters$R/batters$PA batters$HperPA = batters$H/batters$PA batters$SingPerPA = batters$Sing/batters$PA batters$DoubPerPA = batters$Doub/batters$PA batters$TripPerPA = batters$Trip/batters$PA batters$HRperPA = batters$HR/batters$PA batters$RBIperPA = batters$RBI/batters$PA batters$SBperAtt = batters$SB/batters$SB_att batters$CSperAtt = batters$CS/batters$SB_att batters$SB_attPerPA = batters$SB_att/batters$PA batters$BBperPA = batters$BB/batters$PA batters$uBBperPA = batters$uBB/batters$PA batters$SOperPA = batters$SO/batters$PA batters$IBBperPA = batters$IBB/batters$PA batters$HBPperPA = batters$HBP/batters$PA batters$SHperPA = batters$SH/batters$PA batters$SFperPA = batters$SF/batters$PA batters$GIDPperPA = batters$GDP/batters$PA batters$otherOutPerPA = batters$otherOut/batters$PA batters$ChperInn = batters$Ch/batters$Inn batters$POperInn = batters$PO/(batters$Inn) batters$AperInn = batters$A/(batters$Inn) batters$EperInn = batters$E/(batters$Inn) batters$DPperInn = batters$DP/(batters$Inn) batters$PBperInn = batters$PB/(batters$Inn) batters$WPperInn = batters$WP/(batters$Inn) batters$SB_catchPerOpp = batters$SB_catch/(batters$CSopps) batters$CS_catchPerOpp = batters$CS_catch/(batters$CSopps) batters$CSoppsPerInn = batters$CSopps/batters$Inn #get rid of NaN values due to dividing by 0 PAs or Innings batters[is.na(batters)] = 0 #convert batters to a dataframe batters = as.data.frame(batters) #get 1st quartile values for each position in 2010 for (i in yearSet:yearSet){ seasonGames = mean(batters$TeamGames) pitcher = filter(batters,yearID==i,G_p >= seasonGames*.1,Inn>=seasonGames*9*.1,PropPitch>=.7) catcher = filter(batters,yearID==i,G_c >= seasonGames*.45,Inn>=seasonGames*9*.45,PropCatch>=.7) firstBase = filter(batters,yearID==i,G_1b >= seasonGames*.45,Inn>=seasonGames*9*.45,PropFirst>=.7) secondBase = filter(batters,yearID==i,G_2b >= seasonGames*.45,Inn>=seasonGames*9*.45,PropSecond>=.7) thirdBase = filter(batters,yearID==i,G_3b >= seasonGames*.45,Inn>=seasonGames*9*.45,PropThird>=.7) shortStop = filter(batters,yearID==i,G_ss >= seasonGames*.45,Inn>=seasonGames*9*.45,PropShort>=.7) leftField = filter(batters,yearID==i,G_lf >= seasonGames*.45,Inn>=seasonGames*9*.45,PropLeft>=.7) centerField = filter(batters,yearID==i,G_cf >= seasonGames*.45,Inn>=seasonGames*9*.45,PropCenter>=.7) rightField = filter(batters,yearID==i,G_rf >= seasonGames*.45,Inn>=seasonGames*9*.45,PropRight>=.7) outfield = filter(batters,yearID==i,G_of >= seasonGames*.45,Inn>=seasonGames*9*.45,PropOutfield>=.7) designatedHitter = filter(batters,yearID==i,G_dh >= seasonGames*.45,PropDH>=.7) quartiles = batters[,c(8:38,42:60,63:118)] quartiles = quartiles[-(1:length(quartiles$G)),] for (j in 1:31){ quartiles[1,j] = quantile(pitcher[,j+7],baselineLevel)[[1]] quartiles[2,j] = quantile(catcher[,j+7],baselineLevel)[[1]] quartiles[3,j] = quantile(firstBase[,j+7],baselineLevel)[[1]] quartiles[4,j] = quantile(secondBase[,j+7],baselineLevel)[[1]] quartiles[5,j] = quantile(thirdBase[,j+7],baselineLevel)[[1]] quartiles[6,j] = quantile(shortStop[,j+7],baselineLevel)[[1]] quartiles[7,j] = quantile(leftField[,j+7],baselineLevel)[[1]] quartiles[8,j] = quantile(centerField[,j+7],baselineLevel)[[1]] quartiles[9,j] = quantile(rightField[,j+7],baselineLevel)[[1]] quartiles[10,j] = quantile(outfield[,j+7],baselineLevel)[[1]] quartiles[11,j] = quantile(designatedHitter[,j+7],baselineLevel)[[1]] } for (j in 32:50){ quartiles[1,j] = quantile(pitcher[,j+10],baselineLevel)[[1]] quartiles[2,j] = quantile(catcher[,j+10],baselineLevel)[[1]] quartiles[3,j] = quantile(firstBase[,j+10],baselineLevel)[[1]] quartiles[4,j] = quantile(secondBase[,j+10],baselineLevel)[[1]] quartiles[5,j] = quantile(thirdBase[,j+10],baselineLevel)[[1]] quartiles[6,j] = quantile(shortStop[,j+10],baselineLevel)[[1]] quartiles[7,j] = quantile(leftField[,j+10],baselineLevel)[[1]] quartiles[8,j] = quantile(centerField[,j+10],baselineLevel)[[1]] quartiles[9,j] = quantile(rightField[,j+10],baselineLevel)[[1]] quartiles[10,j] = quantile(outfield[,j+10],baselineLevel)[[1]] quartiles[11,j] = quantile(designatedHitter[,j+10],baselineLevel)[[1]] } for (j in 51:106){ quartiles[1,j] = quantile(pitcher[,j+12],baselineLevel)[[1]] quartiles[2,j] = quantile(catcher[,j+12],baselineLevel)[[1]] quartiles[3,j] = quantile(firstBase[,j+12],baselineLevel)[[1]] quartiles[4,j] = quantile(secondBase[,j+12],baselineLevel)[[1]] quartiles[5,j] = quantile(thirdBase[,j+12],baselineLevel)[[1]] quartiles[6,j] = quantile(shortStop[,j+12],baselineLevel)[[1]] quartiles[7,j] = quantile(leftField[,j+12],baselineLevel)[[1]] quartiles[8,j] = quantile(centerField[,j+12],baselineLevel)[[1]] quartiles[9,j] = quantile(rightField[,j+12],baselineLevel)[[1]] quartiles[10,j] = quantile(outfield[,j+12],baselineLevel)[[1]] quartiles[11,j] = quantile(designatedHitter[,j+12],baselineLevel)[[1]] } quartiles$yearID = yearSet quartiles$POS = c("P","C","1B","2B","3B","SS","LF","CF","RF","OF","DH") quartiles = quartiles[,c(108,107,1:106)] } #add columns for relative 1Q values batters$ABperPA1Q = 1 batters$RperPA1Q = 1 batters$HperPA1Q = 1 batters$SingPerPA1Q = 1 batters$DoubPerPA1Q = 1 batters$TripPerPA1Q = 1 batters$HRperPA1Q = 1 batters$RBIperPA1Q = 1 batters$SBperAtt1Q = 1 batters$CSperAtt1Q = 1 batters$SBattPerPA1Q = 1 batters$BBperPA1Q = 1 batters$uBBperPA1Q = 1 batters$SOperPA1Q = 1 batters$IBBperPA1Q = 1 batters$HBPperPA1Q = 1 batters$SHperPA1Q = 1 batters$SFperPA1Q = 1 batters$GIDPperPA1Q = 1 batters$otherOutPerPA1Q = 1 batters$ChperInn1Q = 1 batters$POperInn1Q = 1 batters$AperInn1Q = 1 batters$EperInn1Q = 1 batters$DPperInn1Q = 1 batters$PBperInn1Q = 1 batters$WPperInn1Q = 1 batters$SB_catchPerOpp1Q = 1 batters$CS_catchPerOpp1Q = 1 batters$CSoppsPerInn1Q = 1 #find each player's first quartile values based on their position distributions #factor in % of time at DH for batting stats (first inner loop), but not fielding stats (second inner loop) #thus must adjust fielding position %s by DH time (find relative frequencies) for (n in 1:length(batters$playerID)){ for (k in 119:138){ batters[n,k] = quartiles[1,k-40]*batters[n,78]+quartiles[2,k-40]*batters[n,79]+quartiles[3,k-40]*batters[n,80]+quartiles[4,k-40]*batters[n,81]+quartiles[5,k-40]*batters[n,82]+quartiles[6,k-40]*batters[n,83]+quartiles[7,k-40]*batters[n,84]+quartiles[8,k-40]*batters[n,85]+quartiles[9,k-40]*batters[n,86]+quartiles[11,k-40]*batters[n,88] } for (k in 139:148){ batters[n,k] = quartiles[1,k-40]*(batters[n,78]/(1-batters[n,88]))+quartiles[2,k-40]*(batters[n,79]/(1-batters[n,88]))+quartiles[3,k-40]*(batters[n,80]/(1-batters[n,88]))+quartiles[4,k-40]*(batters[n,81]/(1-batters[n,88]))+quartiles[5,k-40]*(batters[n,82]/(1-batters[n,88]))+quartiles[6,k-40]*(batters[n,83]/(1-batters[n,88]))+quartiles[7,k-40]*(batters[n,84]/(1-batters[n,88]))+quartiles[8,k-40]*(batters[n,85]/(1-batters[n,88]))+quartiles[9,k-40]*(batters[n,86]/(1-batters[n,88])) } } #find each player's quartile rate values batters$SBpercent1Q = 1 batters$BaseVA1Q = 1 batters$BA1Q = 1 batters$OBP1Q = 1 batters$SLG1Q = 1 batters$OPS1Q = 1 batters$wOBA1Q = 1 batters$BatVA1Q = 1 batters$CSpercent1Q = 1 batters$FieldPercent1Q = 1 batters$FieldVA1Q = 1 batters$RFG1Q = 1 batters$RF91Q = 1 for (n in 1:length(batters$playerID)){ batters[n,"SBpercent1Q"] = quartiles[1,"SBpercent"]*batters[n,"PropPitch"]+quartiles[2,"SBpercent"]*batters[n,"PropCatch"]+quartiles[3,"SBpercent"]*batters[n,"PropFirst"]+quartiles[4,"SBpercent"]*batters[n,"PropSecond"]+quartiles[5,"SBpercent"]*batters[n,"PropThird"]+quartiles[6,"SBpercent"]*batters[n,"PropShort"]+quartiles[7,"SBpercent"]*batters[n,"PropLeft"]+quartiles[8,"SBpercent"]*batters[n,"PropCenter"]+quartiles[9,"SBpercent"]*batters[n,"PropRight"]+quartiles[11,"SBpercent"]*batters[n,"PropDH"] batters[n,"BaseVA1Q"] = quartiles[1,"BaseVA"]*batters[n,"PropPitch"]+quartiles[2,"BaseVA"]*batters[n,"PropCatch"]+quartiles[3,"BaseVA"]*batters[n,"PropFirst"]+quartiles[4,"BaseVA"]*batters[n,"PropSecond"]+quartiles[5,"BaseVA"]*batters[n,"PropThird"]+quartiles[6,"BaseVA"]*batters[n,"PropShort"]+quartiles[7,"BaseVA"]*batters[n,"PropLeft"]+quartiles[8,"BaseVA"]*batters[n,"PropCenter"]+quartiles[9,"BaseVA"]*batters[n,"PropRight"]+quartiles[11,"BaseVA"]*batters[n,"PropDH"] batters[n,"BA1Q"] = quartiles[1,"BA"]*batters[n,"PropPitch"]+quartiles[2,"BA"]*batters[n,"PropCatch"]+quartiles[3,"BA"]*batters[n,"PropFirst"]+quartiles[4,"BA"]*batters[n,"PropSecond"]+quartiles[5,"BA"]*batters[n,"PropThird"]+quartiles[6,"BA"]*batters[n,"PropShort"]+quartiles[7,"BA"]*batters[n,"PropLeft"]+quartiles[8,"BA"]*batters[n,"PropCenter"]+quartiles[9,"BA"]*batters[n,"PropRight"]+quartiles[11,"BA"]*batters[n,"PropDH"] batters[n,"OBP1Q"] = quartiles[1,"OBP"]*batters[n,"PropPitch"]+quartiles[2,"OBP"]*batters[n,"PropCatch"]+quartiles[3,"OBP"]*batters[n,"PropFirst"]+quartiles[4,"OBP"]*batters[n,"PropSecond"]+quartiles[5,"OBP"]*batters[n,"PropThird"]+quartiles[6,"OBP"]*batters[n,"PropShort"]+quartiles[7,"OBP"]*batters[n,"PropLeft"]+quartiles[8,"OBP"]*batters[n,"PropCenter"]+quartiles[9,"OBP"]*batters[n,"PropRight"]+quartiles[11,"OBP"]*batters[n,"PropDH"] batters[n,"SLG1Q"] = quartiles[1,"SLG"]*batters[n,"PropPitch"]+quartiles[2,"SLG"]*batters[n,"PropCatch"]+quartiles[3,"SLG"]*batters[n,"PropFirst"]+quartiles[4,"SLG"]*batters[n,"PropSecond"]+quartiles[5,"SLG"]*batters[n,"PropThird"]+quartiles[6,"SLG"]*batters[n,"PropShort"]+quartiles[7,"SLG"]*batters[n,"PropLeft"]+quartiles[8,"SLG"]*batters[n,"PropCenter"]+quartiles[9,"SLG"]*batters[n,"PropRight"]+quartiles[11,"SLG"]*batters[n,"PropDH"] batters[n,"OPS1Q"] = quartiles[1,"OPS"]*batters[n,"PropPitch"]+quartiles[2,"OPS"]*batters[n,"PropCatch"]+quartiles[3,"OPS"]*batters[n,"PropFirst"]+quartiles[4,"OPS"]*batters[n,"PropSecond"]+quartiles[5,"OPS"]*batters[n,"PropThird"]+quartiles[6,"OPS"]*batters[n,"PropShort"]+quartiles[7,"OPS"]*batters[n,"PropLeft"]+quartiles[8,"OPS"]*batters[n,"PropCenter"]+quartiles[9,"OPS"]*batters[n,"PropRight"]+quartiles[11,"OPS"]*batters[n,"PropDH"] batters[n,"wOBA1Q"] = quartiles[1,"wOBA"]*batters[n,"PropPitch"]+quartiles[2,"wOBA"]*batters[n,"PropCatch"]+quartiles[3,"wOBA"]*batters[n,"PropFirst"]+quartiles[4,"wOBA"]*batters[n,"PropSecond"]+quartiles[5,"wOBA"]*batters[n,"PropThird"]+quartiles[6,"wOBA"]*batters[n,"PropShort"]+quartiles[7,"wOBA"]*batters[n,"PropLeft"]+quartiles[8,"wOBA"]*batters[n,"PropCenter"]+quartiles[9,"wOBA"]*batters[n,"PropRight"]+quartiles[11,"wOBA"]*batters[n,"PropDH"] batters[n,"BatVA1Q"] = quartiles[1,"BatVA"]*batters[n,"PropPitch"]+quartiles[2,"BatVA"]*batters[n,"PropCatch"]+quartiles[3,"BatVA"]*batters[n,"PropFirst"]+quartiles[4,"BatVA"]*batters[n,"PropSecond"]+quartiles[5,"BatVA"]*batters[n,"PropThird"]+quartiles[6,"BatVA"]*batters[n,"PropShort"]+quartiles[7,"BatVA"]*batters[n,"PropLeft"]+quartiles[8,"BatVA"]*batters[n,"PropCenter"]+quartiles[9,"BatVA"]*batters[n,"PropRight"]+quartiles[11,"BatVA"]*batters[n,"PropDH"] batters[n,"CSpercent1Q"] = quartiles[1,"CSpercent"]*(batters[n,"PropPitch"]/(1-batters[n,"PropDH"]))+quartiles[2,"CSpercent"]*(batters[n,"PropCatch"]/(1-batters[n,"PropDH"]))+quartiles[3,"CSpercent"]*(batters[n,"PropFirst"]/(1-batters[n,"PropDH"]))+quartiles[4,"CSpercent"]*(batters[n,"PropSecond"]/(1-batters[n,"PropDH"]))+quartiles[5,"CSpercent"]*(batters[n,"PropThird"]/(1-batters[n,"PropDH"]))+quartiles[6,"CSpercent"]*(batters[n,"PropShort"]/(1-batters[n,"PropDH"]))+quartiles[7,"CSpercent"]*(batters[n,"PropLeft"]/(1-batters[n,"PropDH"]))+quartiles[8,"CSpercent"]*(batters[n,"PropCenter"]/(1-batters[n,"PropDH"]))+quartiles[9,"CSpercent"]*(batters[n,"PropRight"]/(1-batters[n,"PropDH"])) batters[n,"FieldPercent1Q"] = quartiles[1,"FieldPercent"]*(batters[n,"PropPitch"]/(1-batters[n,"PropDH"]))+quartiles[2,"FieldPercent"]*(batters[n,"PropCatch"]/(1-batters[n,"PropDH"]))+quartiles[3,"FieldPercent"]*(batters[n,"PropFirst"]/(1-batters[n,"PropDH"]))+quartiles[4,"FieldPercent"]*(batters[n,"PropSecond"]/(1-batters[n,"PropDH"]))+quartiles[5,"FieldPercent"]*(batters[n,"PropThird"]/(1-batters[n,"PropDH"]))+quartiles[6,"FieldPercent"]*(batters[n,"PropShort"]/(1-batters[n,"PropDH"]))+quartiles[7,"FieldPercent"]*(batters[n,"PropLeft"]/(1-batters[n,"PropDH"]))+quartiles[8,"FieldPercent"]*(batters[n,"PropCenter"]/(1-batters[n,"PropDH"]))+quartiles[9,"FieldPercent"]*(batters[n,"PropRight"]/(1-batters[n,"PropDH"])) batters[n,"FieldVA1Q"] = quartiles[1,"FieldVA"]*(batters[n,"PropPitch"]/(1-batters[n,"PropDH"]))+quartiles[2,"FieldVA"]*(batters[n,"PropCatch"]/(1-batters[n,"PropDH"]))+quartiles[3,"FieldVA"]*(batters[n,"PropFirst"]/(1-batters[n,"PropDH"]))+quartiles[4,"FieldVA"]*(batters[n,"PropSecond"]/(1-batters[n,"PropDH"]))+quartiles[5,"FieldVA"]*(batters[n,"PropThird"]/(1-batters[n,"PropDH"]))+quartiles[6,"FieldVA"]*(batters[n,"PropShort"]/(1-batters[n,"PropDH"]))+quartiles[7,"FieldVA"]*(batters[n,"PropLeft"]/(1-batters[n,"PropDH"]))+quartiles[8,"FieldVA"]*(batters[n,"PropCenter"]/(1-batters[n,"PropDH"]))+quartiles[9,"FieldVA"]*(batters[n,"PropRight"]/(1-batters[n,"PropDH"])) batters[n,"RFG1Q"] = quartiles[1,"RFG"]*(batters[n,"PropPitch"]/(1-batters[n,"PropDH"]))+quartiles[2,"RFG"]*(batters[n,"PropCatch"]/(1-batters[n,"PropDH"]))+quartiles[3,"RFG"]*(batters[n,"PropFirst"]/(1-batters[n,"PropDH"]))+quartiles[4,"RFG"]*(batters[n,"PropSecond"]/(1-batters[n,"PropDH"]))+quartiles[5,"RFG"]*(batters[n,"PropThird"]/(1-batters[n,"PropDH"]))+quartiles[6,"RFG"]*(batters[n,"PropShort"]/(1-batters[n,"PropDH"]))+quartiles[7,"RFG"]*(batters[n,"PropLeft"]/(1-batters[n,"PropDH"]))+quartiles[8,"RFG"]*(batters[n,"PropCenter"]/(1-batters[n,"PropDH"]))+quartiles[9,"RFG"]*(batters[n,"PropRight"]/(1-batters[n,"PropDH"])) batters[n,"RF91Q"] = quartiles[1,"RF9"]*(batters[n,"PropPitch"]/(1-batters[n,"PropDH"]))+quartiles[2,"RF9"]*(batters[n,"PropCatch"]/(1-batters[n,"PropDH"]))+quartiles[3,"RF9"]*(batters[n,"PropFirst"]/(1-batters[n,"PropDH"]))+quartiles[4,"RF9"]*(batters[n,"PropSecond"]/(1-batters[n,"PropDH"]))+quartiles[5,"RF9"]*(batters[n,"PropThird"]/(1-batters[n,"PropDH"]))+quartiles[6,"RF9"]*(batters[n,"PropShort"]/(1-batters[n,"PropDH"]))+quartiles[7,"RF9"]*(batters[n,"PropLeft"]/(1-batters[n,"PropDH"]))+quartiles[8,"RF9"]*(batters[n,"PropCenter"]/(1-batters[n,"PropDH"]))+quartiles[9,"RF9"]*(batters[n,"PropRight"]/(1-batters[n,"PropDH"])) } #reorder the quartile rate values to be by the other rate values batters = batters[,c(1:21,149,22,150,23:32,151,33,152,34,153,35,154,36,155,37,156,38:56,157,57,158,58,159,59,160,60,161,61:148)] #get rid of NaN values due to dividing by 0 PAs or Innings batters[is.na(batters)] = 0 #get player per PA or Inn values above/below the 1st quartile batters$ABabovePerPA = batters$ABperPA - batters$ABperPA1Q batters$RabovePerPA = batters$RperPA - batters$RperPA1Q batters$HabovePerPA = batters$HperPA - batters$HperPA1Q batters$SingAbovePerPA = batters$SingPerPA - batters$SingPerPA1Q batters$DoubAbovePerPA = batters$DoubPerPA - batters$DoubPerPA1Q batters$TripAbovePerPA = batters$TripPerPA - batters$TripPerPA1Q batters$HRabovePerPA = batters$HRperPA - batters$HRperPA1Q batters$RBIabovePerPA = batters$RBIperPA - batters$RBIperPA1Q batters$SBabovePerAtt = batters$SBperAtt - batters$SBperAtt1Q batters$CSabovePerAtt = batters$CSperAtt - batters$CSperAtt1Q batters$SBattAbovePerPA = batters$SB_attPerPA - batters$SBattPerPA1Q batters$BBabovePerPA = batters$BBperPA - batters$BBperPA1Q batters$uBBabovePerPA = batters$uBBperPA - batters$uBBperPA1Q batters$SOabovePerPA = batters$SOperPA - batters$SOperPA1Q batters$IBBabovePerPA = batters$IBBperPA - batters$IBBperPA1Q batters$HBPabovePerPA = batters$HBPperPA - batters$HBPperPA1Q batters$SHabovePerPA = batters$SHperPA - batters$SHperPA1Q batters$SFabovePerPA = batters$SFperPA - batters$SFperPA1Q batters$GIDPabovePerPA = batters$GIDPperPA - batters$GIDPperPA1Q batters$otherOutAbovePerPA = batters$otherOutPerPA - batters$otherOutPerPA1Q batters$ChAbovePerInn = batters$ChperInn - batters$ChperInn1Q batters$POabovePerInn = batters$POperInn - batters$POperInn1Q batters$AabovePerInn = batters$AperInn - batters$AperInn1Q batters$EabovePerInn = batters$EperInn - batters$EperInn1Q batters$DPabovePerInn = batters$DPperInn - batters$DPperInn1Q batters$PBabovePerInn = batters$PBperInn - batters$PBperInn1Q batters$WPabovePerInn = batters$WPperInn - batters$WPperInn1Q batters$SB_catchAbovePerOpp = batters$SB_catchPerOpp - batters$SB_catchPerOpp1Q batters$CS_catchAbovePerOpp = batters$CS_catchPerOpp - batters$CS_catchPerOpp1Q batters$CSoppsAbovePerInn = batters$CSoppsPerInn - batters$CSoppsPerInn1Q #get above/below values based on the # of player PAs or Ch Inns batters$ABabove = batters$ABabovePerPA*batters$PA batters$Rabove = batters$RabovePerPA*batters$PA batters$Habove = batters$HabovePerPA*batters$PA batters$SingAbove = batters$SingAbovePerPA*batters$PA batters$DoubAbove = batters$DoubAbovePerPA*batters$PA batters$TripAbove = batters$TripAbovePerPA*batters$PA batters$HRabove = batters$HRabovePerPA*batters$PA batters$RBIabove = batters$RBIabovePerPA*batters$PA batters$SBabove = batters$SBabovePerAtt*batters$SB_att batters$CSabove = batters$CSabovePerAtt*batters$SB_att batters$SBattAbove = batters$SBattAbovePerPA*batters$PA batters$BBabove = batters$BBabovePerPA*batters$PA batters$uBBabove = batters$uBBabovePerPA*batters$PA batters$SOabove = batters$SOabovePerPA*batters$PA batters$IBBabove = batters$IBBabovePerPA*batters$PA batters$HBPabove = batters$HBPabovePerPA*batters$PA batters$SHabove = batters$SHabovePerPA*batters$PA batters$SFabove = batters$SFabovePerPA*batters$PA batters$GIDPabove = batters$GIDPabovePerPA*batters$PA batters$otherOutAbove = batters$otherOutAbovePerPA*batters$PA batters$ChAbove = batters$ChAbovePerInn*batters$Inn batters$POabove = batters$POabovePerInn*batters$Inn batters$Aabove = batters$AabovePerInn*batters$Inn batters$Eabove = batters$EabovePerInn*batters$Inn batters$DPabove = batters$DPabovePerInn*batters$Inn batters$PBabove = batters$PBabovePerInn*batters$Inn batters$WPabove = batters$WPabovePerInn*batters$Inn batters$SB_catchAbove = batters$SB_catchAbovePerOpp*batters$CSopps batters$CS_catchAbove = batters$CS_catchAbovePerOpp*batters$CSopps batters$CSoppsAbove = batters$CSoppsAbovePerInn*batters$Inn #adjusted putouts and assists for difficulty #estimate that 59% of putouts are unassisted, and 41% are assisted #estimate that on assisted putouts, the assist is worth 80% of the out and the putout is worth 20% #only dock C and 1B for assisted (or SO) putouts, since they get the most #must dock by % of time at first, but must also use relative frequency (don't include DH) #divide into 1B or C (relative to DH) piece with unassisted/nonstrikeout POs, #1B or C (relative to DH) piece with assisted/strikeout POs, #and non-1B or C (relative to DH) piece batters$Adj1BAssistPOs = batters$POabove*(batters$PropFirst/(1-batters$PropDH))*.9 batters$Adj1BUnassistPOs = batters$POabove*(batters$PropFirst/(1-batters$PropDH))*.1 batters$AdjCstrikeoutPOs = batters$POabove*(batters$PropCatch/(1-batters$PropDH))*.93 batters$AdjCnonstrikeoutPOs = batters$POabove*(batters$PropCatch/(1-batters$PropDH))*.07 batters$AdjNon1BorC_POs = batters$POabove*(1-((batters$PropFirst+batters$PropCatch)/(1-batters$PropDH))) #determine total values based on run value weights #round weights to 2 decimals batters$BattingValue = 1.29*batters$HRabove+.91*batters$TripAbove+.58*batters$DoubAbove+.30*batters$SingAbove+.22*batters$uBBabove+.17*batters$IBBabove+.24*batters$HBPabove-.22*batters$SHabove-.1*batters$SFabove-.75*batters$GIDPabove-.34*batters$SOabove-.33*batters$otherOutAbove batters$BaserunningValue = .15*batters$SBabove - .39*batters$CSabove batters$FieldingValue = .33*batters$AdjNon1BorC_POs + .33*batters$Adj1BUnassistPOs + .33*.2*batters$Adj1BAssistPOs + .33*batters$AdjCnonstrikeoutPOs + .33*.33*batters$AdjCstrikeoutPOs +.33*.8*batters$Aabove - .68*batters$Eabove - .26*batters$PBabove + .39*batters$CS_catchAbove - .15*batters$SB_catchAbove + .1*batters$DPabove batters[is.na(batters)] = 0 #adjust Fielding Value for pitchers/Ohtani #don't care about fielding for DH, pitcher fielding measured in other file batters$FieldingValue = ifelse((batters$PropPitch/(1-batters$PropDH))>=.5,0,batters$FieldingValue) #sum up player value for position players batters$TotalValue = batters$BattingValue + batters$BaserunningValue + batters$FieldingValue #reorder columns and get rid of NAs batters = batters[,c(1:7,227:230,8:226)] batters[is.na(batters)] = 0 #create files for Excel batters$nameLast <- NULL batters$nameFirst <- NULL batters$lgID_App <- NULL catcher$nameLast <- NULL catcher$nameFirst <- NULL catcher$lgID_App <- NULL firstBase$nameLast <- NULL firstBase$nameFirst <- NULL firstBase$lgID_App <- NULL secondBase$nameLast <- NULL secondBase$nameFirst <- NULL secondBase$lgID_App <- NULL thirdBase$nameLast <- NULL thirdBase$nameFirst <- NULL thirdBase$lgID_App <- NULL shortStop$nameLast <- NULL shortStop$nameFirst <- NULL shortStop$lgID_App <- NULL leftField$nameLast <- NULL leftField$nameFirst <- NULL leftField$lgID_App <- NULL centerField$nameLast <- NULL centerField$nameFirst <- NULL centerField$lgID_App <- NULL rightField$nameLast <- NULL rightField$nameFirst <- NULL rightField$lgID_App <- NULL outfield$nameLast <- NULL outfield$nameFirst <- NULL outfield$lgID_App <- NULL designatedHitter$nameLast <- NULL designatedHitter$nameFirst <- NULL designatedHitter$lgID_App <- NULL write.csv(batters, "~/Documents/Baseball Stats/Player Value/2022/2022PlayerValues.csv") write.csv(quartiles, "~/Documents/Baseball Stats/Player Value/2022/2022quartiles.csv") write.csv(catcher, "~/Documents/Baseball Stats/Player Value/2022/2022catchers.csv") write.csv(firstBase, "~/Documents/Baseball Stats/Player Value/2022/2022firstbasemen.csv") write.csv(secondBase, "~/Documents/Baseball Stats/Player Value/2022/2022secondbasemen.csv") write.csv(thirdBase, "~/Documents/Baseball Stats/Player Value/2022/2022thirdbasemen.csv") write.csv(shortStop, "~/Documents/Baseball Stats/Player Value/2022/2022shortstops.csv") write.csv(leftField, "~/Documents/Baseball Stats/Player Value/2022/2022leftfielders.csv") write.csv(centerField, "~/Documents/Baseball Stats/Player Value/2022/2022centerfielders.csv") write.csv(rightField, "~/Documents/Baseball Stats/Player Value/2022/2022rightfielders.csv") write.csv(outfield, "~/Documents/Baseball Stats/Player Value/2022/2022outfielders.csv") write.csv(designatedHitter, "~/Documents/Baseball Stats/Player Value/2022/2022dhs.csv") #group players by their primary position batters$MainPOS = "Util" for (i in 1:length(batters$playerID)){ if (batters$PropPitch[i]>=.5){ batters$MainPOS[i]="P" } if (batters$PropCatch[i]>=.5){ batters$MainPOS[i]="C" } if (batters$PropFirst[i]>=.5){ batters$MainPOS[i]="1B" } if (batters$PropSecond[i]>=.5){ batters$MainPOS[i]="2B" } if (batters$PropThird[i]>=.5){ batters$MainPOS[i]="3B" } if (batters$PropShort[i]>=.5){ batters$MainPOS[i]="SS" } if (batters$PropLeft[i]>=.5){ batters$MainPOS[i]="LF" } if (batters$PropCenter[i]>=.5){ batters$MainPOS[i]="CF" } if (batters$PropRight[i]>=.5){ batters$MainPOS[i]="RF" } if (batters$PropDH[i]>=.5){ batters$MainPOS[i]="DH" } } batters$MainPOS = as.factor(batters$MainPOS) #plot total value by position library(ggplot2) ggplot(data = batters, mapping = aes(x = MainPOS, y = TotalValue)) + geom_boxplot() + labs(x = 'Position', y = 'Total Value') #get total value by position batters %>% group_by(MainPOS) %>% summarise(Min = min(TotalValue), FirstQuart = quantile(TotalValue,.25), Mean = mean(TotalValue), Median = median(TotalValue), ThirdQuart = quantile(TotalValue,.75), Max = max(TotalValue), Freq = n()) #plot batting value by position ggplot(data = batters, mapping = aes(x = MainPOS, y = BattingValue)) + geom_boxplot() + labs(x = 'Position', y = 'Batting Value') #get batting value by position batters %>% group_by(MainPOS) %>% summarise(Min = min(BattingValue), FirstQuart = quantile(BattingValue,.25), Mean = mean(BattingValue), Median = median(BattingValue), ThirdQuart = quantile(BattingValue,.75), Max = max(BattingValue), Freq = n()) #plot baserunning value by position ggplot(data = batters, mapping = aes(x = MainPOS, y = BaserunningValue)) + geom_boxplot() + labs(x = 'Position', y = 'Baserunning Value') #get baserunning value by position batters %>% group_by(MainPOS) %>% summarise(Min = min(BaserunningValue), FirstQuart = quantile(BaserunningValue,.25), Mean = mean(BaserunningValue), Median = median(BaserunningValue), ThirdQuart = quantile(BaserunningValue,.75), Max = max(BaserunningValue), Freq = n()) #plot fielding value by position ggplot(data = batters, mapping = aes(x = MainPOS, y =FieldingValue)) + geom_boxplot() + labs(x = 'Position', y = 'Fielding Value') #get fielding value by position batters %>% group_by(MainPOS) %>% summarise(Min = min(FieldingValue), FirstQuart = quantile(FieldingValue,.25), Mean = mean(FieldingValue), Median = median(FieldingValue), ThirdQuart = quantile(FieldingValue,.75), Max = max(FieldingValue), Freq = n()) #aggregate teams batters$oneTeam = T batters$oneTeam = ifelse(batters$teamID=='TOT',F,T) oneTeam = filter(batters,oneTeam==T,MainPOS!='P') oneTeam$teamID = as.factor(oneTeam$teamID) #plot total value by team ggplot(data = oneTeam, mapping = aes(x = teamID, y =TotalValue)) + geom_boxplot() + labs(x = 'Team', y = 'Total Value') #get total values by team oneTeam = as.data.frame(oneTeam) library(dplyr) oneTeam %>% group_by(teamID) %>% summarise(Means = mean(TotalValue), Medians = median(TotalValue), Freq = n()) %>% print(n=30) #plot batting value by team ggplot(data = oneTeam, mapping = aes(x = teamID, y =BattingValue)) + geom_boxplot() + labs(x = 'Team', y = 'Batting Value') #get batting values by team oneTeam = as.data.frame(oneTeam) library(dplyr) oneTeam %>% group_by(teamID) %>% summarise(Means = mean(BattingValue), Medians = median(BattingValue), Freq = n()) %>% print(n=30) #plot baserunning value by team ggplot(data = oneTeam, mapping = aes(x = teamID, y =BaserunningValue)) + geom_boxplot() + labs(x = 'Team', y = 'Baserunning Value') #get baserunning values by team oneTeam = as.data.frame(oneTeam) library(dplyr) oneTeam %>% group_by(teamID) %>% summarise(Means = mean(BaserunningValue), Medians = median(BaserunningValue), Freq = n()) %>% print(n=30) #plot fielding value by team ggplot(data = oneTeam, mapping = aes(x = teamID, y =FieldingValue)) + geom_boxplot() + labs(x = 'Team', y = 'Fielding Value') #get fielding values by team oneTeam = as.data.frame(oneTeam) library(dplyr) oneTeam %>% group_by(teamID) %>% summarise(Min = min(FieldingValue), FirstQuart = quantile(FieldingValue,.25), Mean = mean(FieldingValue), Median = median(FieldingValue), ThirdQuart = quantile(FieldingValue,.75), Max = max(FieldingValue), Total = sum(FieldingValue), Freq = n()) %>% print(n=30) #MLB by position batters = batters[batters$MainPOS!='P',] ggplot(batters, aes(x = FieldingValue, y = BattingValue+BaserunningValue, color = factor(MainPOS))) + geom_point() + geom_abline(intercept=0,slope=-1) + geom_abline(intercept=quantile(batters$TotalValue,.95),slope=-1,color="green") + geom_abline(intercept=quantile(batters$TotalValue,.99),slope=-1,color="darkgreen") + geom_abline(intercept=quantile(batters$TotalValue,.05),slope=-1,color="red") + geom_abline(intercept=quantile(batters$TotalValue,.01),slope=-1,color="darkred")+ labs(title="Player Value By Position, MLB")+ coord_cartesian(xlim=c(-20,50),ylim=c(-30,80)) #MLB by team ggplot(oneTeam, aes(x = FieldingValue, y = BattingValue+BaserunningValue, color = factor(teamID))) + geom_point() + geom_abline(intercept=0,slope=-1) + geom_abline(intercept=quantile(batters$TotalValue,.95),slope=-1,color="green") + geom_abline(intercept=quantile(batters$TotalValue,.99),slope=-1,color="darkgreen") + geom_abline(intercept=quantile(batters$TotalValue,.05),slope=-1,color="red") + geom_abline(intercept=quantile(batters$TotalValue,.01),slope=-1,color="darkred")+ labs(title="Player Value By Team, MLB")+ coord_cartesian(xlim=c(-20,50),ylim=c(-30,80)) AL = filter(oneTeam,lgID=='AL') NL = filter(oneTeam,lgID=='NL') #AL by position ggplot(AL, aes(x = FieldingValue, y = BattingValue+BaserunningValue, color = factor(MainPOS))) + geom_point() + geom_abline(intercept=0,slope=-1) + geom_abline(intercept=quantile(AL$TotalValue,.95),slope=-1,color="green") + geom_abline(intercept=quantile(AL$TotalValue,.99),slope=-1,color="darkgreen") + geom_abline(intercept=quantile(AL$TotalValue,.05),slope=-1,color="red") + geom_abline(intercept=quantile(AL$TotalValue,.01),slope=-1,color="darkred")+ labs(title="Player Value By Position, AL")+ coord_cartesian(xlim=c(-20,50),ylim=c(-30,80)) #AL by team ggplot(AL, aes(x = FieldingValue, y = BattingValue+BaserunningValue, color = factor(teamID))) + geom_point() + geom_abline(intercept=0,slope=-1) + geom_abline(intercept=quantile(AL$TotalValue,.95),slope=-1,color="green") + geom_abline(intercept=quantile(AL$TotalValue,.99),slope=-1,color="darkgreen") + geom_abline(intercept=quantile(AL$TotalValue,.05),slope=-1,color="red") + geom_abline(intercept=quantile(AL$TotalValue,.01),slope=-1,color="darkred")+ labs(title="Player Value By Team, AL")+ coord_cartesian(xlim=c(-20,50),ylim=c(-30,80)) #NL by position ggplot(NL, aes(x = FieldingValue, y = BattingValue+BaserunningValue, color = factor(MainPOS))) + geom_point() + geom_abline(intercept=0,slope=-1) + geom_abline(intercept=quantile(NL$TotalValue,.95),slope=-1,color="green") + geom_abline(intercept=quantile(NL$TotalValue,.99),slope=-1,color="darkgreen") + geom_abline(intercept=quantile(NL$TotalValue,.05),slope=-1,color="red") + geom_abline(intercept=quantile(NL$TotalValue,.01),slope=-1,color="darkred")+ labs(title="Player Value By Position, NL")+ coord_cartesian(xlim=c(-20,50),ylim=c(-30,80)) #NL by team ggplot(NL, aes(x = FieldingValue, y = BattingValue+BaserunningValue, color = factor(teamID))) + geom_point() + geom_abline(intercept=0,slope=-1) + geom_abline(intercept=quantile(NL$TotalValue,.95),slope=-1,color="green") + geom_abline(intercept=quantile(NL$TotalValue,.99),slope=-1,color="darkgreen") + geom_abline(intercept=quantile(NL$TotalValue,.05),slope=-1,color="red") + geom_abline(intercept=quantile(NL$TotalValue,.01),slope=-1,color="darkred")+ labs(title="Player Value By Team, NL")+ coord_cartesian(xlim=c(-20,50),ylim=c(-30,80)) summary(batters$TotalValue) 9*30 sum(batters$TotalValue>=5)/length(batters$TotalValue) sum(batters$TotalValue>=10)/length(batters$TotalValue) sum(batters$TotalValue>=15)/length(batters$TotalValue) sum(batters$TotalValue>=20)/length(batters$TotalValue) sum(batters$TotalValue>=25)/length(batters$TotalValue) sum(batters$TotalValue>=30)/length(batters$TotalValue) sum(batters$TotalValue>=35)/length(batters$TotalValue) sum(batters$TotalValue>=40)/length(batters$TotalValue) sum(batters$TotalValue>=45)/length(batters$TotalValue) sum(batters$TotalValue>=50)/length(batters$TotalValue) quantile(batters$TotalValue,.99) quantile(batters$TotalValue,.01)