library(readxl) library(dplyr) library(ggplot2) yearSet <- 2022 baselineLevel <- .25 yearGames <- 162 Batting <- read_excel("~/Documents/Baseball Stats/Player Value/2022/StanBatting_2022.xlsx") Fielding <- read_excel("~/Documents/Baseball Stats/Player Value/2022/StanFielding_2022.xlsx") CatchFielding <- read_excel("~/Documents/Baseball Stats/Player Value/2022/CatchBaseFielding_2022.xlsx") Appearances <- read_excel("~/Documents/Baseball Stats/Player Value/2022/Appearances_2022.xlsx") Pitching <- read_excel("~/Documents/Baseball Stats/Player Value/2022/StanPitching_2022.xlsx") PitchingBA <- read_excel("~/Documents/Baseball Stats/Player Value/2022/Pitching_BatAgainst_2022.xlsx") #format Pitching data to match Lahman code #remove league average row Pitching = Pitching[Pitching$`Name-additional`!='-9999',] #remove * and # from names substrRight <- function(x, n){ substr(x, nchar(x)-n+1, nchar(x)) } Pitching$Name = ifelse(substrRight(Pitching$Name,1)=="*",substr(Pitching$Name,1,nchar(Pitching$Name)-1),Pitching$Name) Pitching$Name = ifelse(substrRight(Pitching$Name,1)=="#",substr(Pitching$Name,1,nchar(Pitching$Name)-1),Pitching$Name) PitchingBA$Name = ifelse(substrRight(PitchingBA$Name,1)=="*",substr(PitchingBA$Name,1,nchar(PitchingBA$Name)-1),PitchingBA$Name) PitchingBA$Name = ifelse(substrRight(PitchingBA$Name,1)=="#",substr(PitchingBA$Name,1,nchar(PitchingBA$Name)-1),PitchingBA$Name) Pitching$Rk <- NULL Pitching <- rename(Pitching,playerID=`Name-additional`,teamID=Tm,lgID=Lg,BFP=BF) Pitching$ERA <- NULL Pitching$`ERA+`<- NULL Pitching$FIP<- NULL Pitching$Age<-NULL Pitching$WHIP<-NULL Pitching$H9<-NULL Pitching$HR9<-NULL Pitching$BB9<-NULL Pitching$SO9<-NULL Pitching$`SO/W`<-NULL Pitching$`W-L%`<-NULL PitchingBA$Rk <- NULL PitchingBA <- rename(PitchingBA,playerID=`Name-additional`,teamID=Tm,GIDP=GDP) PitchingBA$Age<-NULL PitchingBA[,c(3:22)]<-NULL PitchingBA$HBP<-NULL PitchingBA$ROE<-NULL PitchingBA$IBB<-NULL Pitching = left_join(Pitching,PitchingBA,by=c("Name","teamID","playerID")) Pitching$yearID <- yearSet Pitching = Pitching[,c(24,1,28,3:2,4:7,9:13,15:17,19:18,22,20,21,23,8,14,26:27,25)] #save playerID to Name mapping to insert in later pitchNames = Pitching[,c(1:2)] Pitching$Name<-NULL Pitching$TeamGames<-yearGames #format Fielding data to match Lahman code Fielding$Rk <- NULL Fielding$Rtot <- NULL Fielding$Rdrs <- NULL Fielding$`Rdrs/yr`<- NULL Fielding$Rgood <- NULL Fielding$`Rtot/yr`<- NULL Fielding$Age <- NULL Fielding$yearID <- 2022 Fielding <- rename(Fielding,playerID=`Name-additional`,lgID=Lg,teamID_field=Tm,G_field=G,GS_field=GS) Fielding$POS <- Fielding$`Pos Summary` Fielding$`Pos Summary`<- NULL Fielding$CG <- NULL CatchFielding$Rk <- NULL CatchFielding <- rename(CatchFielding,playerID=`Name-additional`,teamID_field=Tm,SB_catch=SB,CS_catch=CS,WP_field=WP) CatchFielding$Age <- NULL CatchFielding$SBO <- NULL CatchFielding = CatchFielding[,-c(7:20)] Fielding = left_join(Fielding,CatchFielding,by=c("playerID","teamID_field","Name")) Fielding$Ch <- NULL Fielding$`Fld%` <- NULL Fielding$`RF/9`<-NULL Fielding$`RF/G`<-NULL Fielding$Name<-NULL Fielding = Fielding[,c(10:11,2:1,12,3:9,13:16)] #format Appearances data to match Lahman code Appearances$Rk <- NULL Appearances$Age <- NULL Appearances$Yrs <- NULL Appearances$GS <- NULL Appearances$Batting <- NULL Appearances$Defense <- NULL Appearances <- rename(Appearances,playerID=`Name-additional`,teamID_App=Tm,G_all=G,G_p=P,G_c=C,G_1b=`1B`,G_2b=`2B`, G_3b=`3B`,G_ss=SS,G_lf=LF,G_cf=CF,G_rf=RF,G_dh=DH,G_of=OF,G_pr=PR,G_ph=PH) Appearances$yearID=yearSet Appearances$lgID_App <- 'DELETE LATER' Appearances = Appearances[,c(17:18,19,2,1,3:16)] Appearances$Name <- NULL #format Batting data to match Lahman code Batting$Rk <- NULL Batting$`OPS+`<- NULL Batting$yearID <- yearSet Batting$Age <- NULL Batting$TB <- NULL Batting$PA <- NULL Batting$BA <- NULL Batting$OBP <- NULL Batting$SLG <- NULL Batting$OPS <- NULL Batting = Batting[Batting$`Name-additional`!='-9999',] Batting <- rename(Batting,playerID=`Name-additional`,teamID_bat=Tm,lgID=Lg,Doub=`2B`,Trip=`3B`,G_bat=G,R_bat=R, H_bat=H,HR_bat=HR,BB_bat=BB,SO_bat=SO,IBB_bat=IBB,HBP_bat=HBP,SF_bat=SF,SH_bat=SH,GIDP_bat=GDP) Batting <- Batting[,c(22,23,3:2,1,4:15,20,17,18:19,16)] Batting$Name <- NULL #combine Pitching, Fielding, Batting, and Appearances tables total_step1 = left_join(Pitching,Fielding, by = c("yearID","playerID","lgID")) total_step2 = left_join(total_step1,Batting,by=c("yearID","playerID","lgID")) total = left_join(total_step2,Appearances, by = c("yearID","playerID")) total$lgID_App <- NULL #check for NA values sum(is.na(total)) #get rid of NULL and NA values total[total=='NULL']=0 total$teamID_bat = ifelse(is.na(total$teamID_bat),"",total$teamID_bat) total$teamID_field = ifelse(is.na(total$teamID_field),"",total$teamID_field) total$teamID_App = ifelse(is.na(total$teamID_App),"",total$teamID_App) total$POS = ifelse(is.na(total$POS),"?",total$POS) total[is.na(total)]=0 #find % of games played at each position, including SP/RP split total$G_tot = total$G_p +total$G_c+total$G_1b+total$G_2b+total$G_3b+total$G_ss+total$G_lf+total$G_cf+total$G_rf+total$G_dh total$PropPitch = total$G_p/total$G_tot total$PropSP = total$GS/total$G total$PropRP = 1 - total$PropSP total$PropCatch = total$G_c/total$G_tot total$PropFirst = total$G_1b/total$G_tot total$PropSecond = total$G_2b/total$G_tot total$PropThird = total$G_3b/total$G_tot total$PropShort = total$G_ss/total$G_tot total$PropLeft = total$G_lf/total$G_tot total$PropCenter = total$G_cf/total$G_tot total$PropRight = total$G_rf/total$G_tot total$PropOutfield = total$G_of/total$G_tot total$PropDH = total$G_dh/total$G_tot #create columns for other values total$WinPercent = total$W / (total$W+total$L) total$nonHRhits = total$H-total$HR total$IP2 = total$IP total$uBB = total$BB - total$IBB total$otherOut = total$BFP-total$BB-total$HBP-total$SF-total$SH - total$H - total$SO - total$GIDP total$ERA = 9*total$ER/total$IP total$WHIP = (total$BB+total$H)/total$IP total$BA_opp = total$H/(total$BFP-total$BB-total$HBP-total$SF-total$SH) total$OBP_opp = (total$H+total$BB+total$HBP)/(total$BFP-total$SH) total$BABIP_opp = (total$H-total$HR)/(total$BFP-total$BB-total$HBP-total$SH-total$SO-total$HR) total$PitVA = (-.37*total$nonHRhits - 1.29*total$HR - .22*total$uBB-.17*total$IBB-.24*total$HBP+.10*total$SF-.26*total$WP-.25*total$BK+.22*total$SH+.34*total$SO+.75*total$GIDP+.33*total$otherOut)/total$BFP total$Sing = total$H_bat - total$Doub - total$Trip - total$HR_bat total$PA = total$AB + total$BB_bat + total$HBP_bat + total$SF_bat + total$SH_bat total$uBB_bat = total$BB_bat - total$IBB_bat total$Ch = total$PO + total$A + total$E total$otherOut_bat = total$AB - total$H_bat - total$SO_bat - total$GIDP_bat total$SBpercent = total$SB / (total$SB + total$CS) total$BaseVA = (.15*total$SB - .39*total$CS) / (total$SB + total$CS) total$CSpercent = total$CS_catch / (total$CS_catch + total$SB_catch) total$BA = total$H_bat/total$AB total$OBP = (total$H_bat+total$BB_bat+total$HBP_bat)/(total$AB+total$BB_bat+total$HBP_bat+total$SF_bat) total$SLG = (1*total$Sing+2*total$Doub+3*total$Trip+4*total$HR_bat)/total$AB total$OPS = total$OBP + total$SLG total$BatVA = (.22*total$uBB+.17*total$IBB+.24*total$HBP+.30*total$Sing+.58*total$Doub+.91*total$Trip+1.29*total$HR-.10*total$SF-.34*total$SO-.33*total$otherOut-.22*total$SH-.75*total$GIDP)/(total$AB+total$BB+total$HBP+total$SF+total$SH) total$FieldPercent = (total$PO+total$A)/total$Ch total$Inn_field = total$IP total$FieldVA = ifelse(total$SB_catch>0 & total$CS_catch>0, ((.33*total$PO+.33*total$A+.10*total$DP-.26*total$PB-.68*total$E)/(total$Inn_field)) + (.39*total$CS_catch-.15*total$SB_catch)/(total$CS_catch+total$SB_catch), (.33*total$PO+.33*total$A+.10*total$DP-.26*total$PB-.68*total$E)/(total$Inn_field)) total$RFG = (total$PO+total$A)/total$G_tot total$RF9 = 9*(total$PO+total$A)/total$Inn_field #load in wOBA weights for each season to calculate wOBA and FIP library(readxl) wOBAweights <- read_excel("~/Documents/Baseball Stats/Player Value/Yearly_wOBA_weights.xlsx") wOBAweights$yearID <- wOBAweights$Season wOBAweights$Season <- NULL wOBAweights$wOBA <- NULL wOBAweights$wOBAScale <- NULL total <- left_join(total,wOBAweights,by="yearID") total$wOBA <- (total$wBB*total$uBB_bat+total$wHBP*total$HBP_bat+total$w1B*total$Sing+total$w2B*total$Doub+total$w3B*total$Trip+total$wHR*total$HR_bat)/(total$AB+total$BB_bat-total$IBB_bat+total$SF_bat+total$HBP_bat) total$FIP <- total$cFIP+((13*total$HR+3*total$BB+3*total$HBP-2*total$SO)/total$IP) #get rid of now unneeded wOBA weight columns total$wBB <- NULL total$wHBP <- NULL total$w1B <- NULL total$w2B <- NULL total$w3B <- NULL total$wHR <- NULL total$runSB <- NULL total$runCS <- NULL total$cFIP <- NULL #set NA values to 0 total[is.na(total)] = 0 #round rate values total$WinPercent <- round(total$WinPercent,digits=3) total$ERA <- round(total$ERA,digits=2) total$WHIP <- round(total$WHIP,digits=3) total$BA_opp <- round(total$BA_opp,digits=3) total$OBP_opp <- round(total$OBP_opp,digits=3) total$BABIP_opp <- round(total$OBP_opp,digits=3) total$FIP <- round(total$FIP,digits=2) total$PitVA <- round(total$PitVA,digits=4) total$BA <- round(total$BA,digits=3) total$OBP <- round(total$OBP,digits=3) total$SLG <- round(total$SLG,digits=3) total$OPS <- round(total$OPS,digits=3) total$wOBA <- round(total$wOBA,digits=3) total$BatVA <- round(total$BatVA,digits=4) total$SBpercent <- round(total$SBpercent,digits=4) total$BaseVA <- round(total$BaseVA,digits=4) total$CSpercent <- round(total$CSpercent,digits=4) total$FieldPercent <- round(total$FieldPercent,digits=3) total$FieldVA <- round(total$FieldVA,digits=4) total$RFG <- round(total$RFG,digits=2) total$RF9 <- round(total$RF9,digits=2) total$PropPitch <- round(total$PropPitch,digits=4) total$PropSP <- round(total$PropSP,digits=4) total$PropRP <- round(total$PropRP,digits=4) total$PropCatch <- round(total$PropCatch,digits=4) total$PropFirst <- round(total$PropFirst,digits=4) total$PropSecond <- round(total$PropSecond,digits=4) total$PropThird <- round(total$PropThird,digits=4) total$PropShort <- round(total$PropShort,digits=4) total$PropLeft <- round(total$PropLeft,digits=4) total$PropCenter <- round(total$PropCenter,digits=4) total$PropRight <- round(total$PropRight,digits=4) total$PropOutfield <- round(total$PropOutfield,digits=4) total$PropDH <- round(total$PropDH,digits=4) total$Inn_field <- round(total$Inn_field,digits=3) total$IP <- round(total$IP,digits=3) #reorder columns #move up ERA to be after count pitching stats total = total[,c(1:27,94,28:93,95:119)] #move up WHIP, BA_opp, OBP_opp, BABIP_opp, FIP, and PitVA to be after ERA total = total[,c(1:28,95:98,119,99,29:94,100:118)] #move up winning percentage to be after wins and losses total = total[,c(1:6,96,7:95,97:119)] #move up uBB to be after walks total = total[,c(1:17,99,18:98,100:119)] #move up IP to be before IPouts total = total[,c(1:12,99,13:98,100:119)] #move up nonHR hits to be after hits total = total[,c(1:15,99,16:98,100:119)] #move up other outs to be after GIDP total = total[,c(1:31,100,32:99,101:119)] #move up singles to be after batting hits total = total[,c(1:58,101,59:100,102:119)] #move up PAs to be before ABs total = total[,c(1:55,102,56:101,103:119)] #move up batting uBBs to be after batting walks total = total[,c(1:67,103,68:102,104:119)] #move up defensive chances to be before putouts and assists total = total[,c(1:45,104,46:103,105:119)] #move up batting other outs to be after batting GIDP total = total[,c(1:75,105,76:104,106:119)] #move up stolen base percentage and BaseVA to be after SB and CS total = total[,c(1:67,106:107,68:105,108:119)] #move up batting BA, OBP, SLG, OPS, wOBA, and BatVA to be after counting batting stats total = total[,c(1:78,109:112,119,113,79:108,114:118)] #move up catcher CS percent to be after catcher SB and CS total = total[,c(1:54,114,55:113,115:119)] #move up innings in field to be before inning outs in field total = total[,c(1:44,116,45:115,117:119)] #move up fielding rate stats to be after other fielding stats total = total[,c(1:56,116:119,57:115)] #add in player first and last names pitchNames$Name2 = pitchNames$Name final = left_join(total,pitchNames,by="playerID") #put names at front and output file final = final[,c(1,121,120,2:119)] write.csv(final,"~/Documents/Baseball Stats/Player Value/2022/PitchRawCombData.csv") #filter to players in 2022 and add columns for relative 1Q values #focus on only mainly pitchers, not position players that pitched pitchers = filter(final,yearID==yearSet,PropPitch>=.15) #add baserunner SB attempts and catcher CS opps and reorder columns and remove NAs pitchers$SB_att = pitchers$SB + pitchers$CS pitchers$CSopps = pitchers$CS_catch + pitchers$SB_catch pitchers = pitchers[,c(1:57,123,58:75,122,76:121)] pitchers[is.na(pitchers)] = 0 #get player per PA values and per Chance/Opportunity Values pitchers$HperInn = pitchers$H/pitchers$IP pitchers$nonHRhitsPerInn = pitchers$nonHRhits/pitchers$IP pitchers$ERperInn = pitchers$ER/pitchers$IP pitchers$HRperInn = pitchers$HR/pitchers$IP pitchers$BBperInn = pitchers$BB/pitchers$IP pitchers$uBBperInn = pitchers$uBB/pitchers$IP pitchers$SOperInn = pitchers$SO/pitchers$IP pitchers$IBBperInn = pitchers$IBB/pitchers$IP pitchers$WPperInn = pitchers$WP/pitchers$IP pitchers$HBPperInn = pitchers$HBP/pitchers$IP pitchers$BKperInn = pitchers$BK/pitchers$IP pitchers$BattersPerInn = pitchers$BFP/pitchers$IP pitchers$RperInn = pitchers$R/pitchers$IP pitchers$SHperInn = pitchers$SH/pitchers$IP pitchers$SFperInn = pitchers$SF/pitchers$IP pitchers$GIDPperInn = pitchers$GIDP/pitchers$IP pitchers$otherOutPerInn = pitchers$otherOut/pitchers$IP pitchers$ChperInn = pitchers$Ch/pitchers$Inn_field pitchers$POperInn = pitchers$PO/(pitchers$Inn_field) pitchers$AperInn = pitchers$A/(pitchers$Inn_field) pitchers$EperInn = pitchers$E/(pitchers$Inn_field) pitchers$DPperInn = pitchers$DP/(pitchers$Inn_field) pitchers$PBperInn = pitchers$PB/(pitchers$Inn_field) pitchers$SB_catchPerOpp = pitchers$SB_catch/(pitchers$CSopps) pitchers$CS_catchPerOpp = pitchers$CS_catch/(pitchers$CSopps) pitchers$CSoppsPerInn = pitchers$CSopps/pitchers$Inn_field pitchers$ABperPA = pitchers$AB/pitchers$PA pitchers$RperPA = pitchers$R_bat/pitchers$PA pitchers$HperPA = pitchers$H_bat/pitchers$PA pitchers$SingPerPA = pitchers$Sing/pitchers$PA pitchers$DoubPerPA = pitchers$Doub/pitchers$PA pitchers$TripPerPA = pitchers$Trip/pitchers$PA pitchers$HRperPA = pitchers$HR_bat/pitchers$PA pitchers$RBIperPA = pitchers$RBI/pitchers$PA pitchers$SBperAtt = pitchers$SB/pitchers$SB_att pitchers$CSperAtt = pitchers$CS/pitchers$SB_att pitchers$SB_attPerPA = pitchers$SB_att/pitchers$PA pitchers$BBperPA = pitchers$BB_bat/pitchers$PA pitchers$uBBperPA = pitchers$uBB_bat/pitchers$PA pitchers$SOperPA = pitchers$SO_bat/pitchers$PA pitchers$IBBperPA = pitchers$IBB_bat/pitchers$PA pitchers$HBPperPA = pitchers$HBP_bat/pitchers$PA pitchers$SHperPA = pitchers$SH_bat/pitchers$PA pitchers$SFperPA = pitchers$SF_bat/pitchers$PA pitchers$GIDPperPA = pitchers$GIDP_bat/pitchers$PA pitchers$otherOutPerPA = pitchers$otherOut_bat/pitchers$PA #get rid of NaN values due to dividing by 0 PAs or Innings pitchers[is.na(pitchers)] = 0 #convert pitchers to a dataframe pitchers = as.data.frame(pitchers) #get 1st quartile values for SP and RP in 2010 for (i in yearSet:yearSet){ seasonGames = mean(pitchers$TeamGames) starter = filter(pitchers,yearID==i,IP>=seasonGames*9*.095,PropSP>=.7) reliever = filter(pitchers,yearID==i,IP>=seasonGames*9*.02375,PropRP>=.7) quartiles = pitchers[,c(7:42,45:63,65:94,96:169)] quartiles = quartiles[-(1:length(quartiles$W)),] for (j in 1:36){ quartiles[1,j] = quantile(starter[,j+6],baselineLevel)[[1]] quartiles[2,j] = quantile(reliever[,j+6],baselineLevel)[[1]] } for (j in 37:55){ quartiles[1,j] = quantile(starter[,j+8],baselineLevel)[[1]] quartiles[2,j] = quantile(reliever[,j+8],baselineLevel)[[1]] } for (j in 56:85){ quartiles[1,j] = quantile(starter[,j+9],baselineLevel)[[1]] quartiles[2,j] = quantile(reliever[,j+9],baselineLevel)[[1]] } for (j in 86:159){ quartiles[1,j] = quantile(starter[,j+10],baselineLevel)[[1]] quartiles[2,j] = quantile(reliever[,j+10],baselineLevel)[[1]] } quartiles$yearID = i } quartiles$POS = c("SP","RP") quartiles = quartiles[,c(160,161,1:159)] #add columns for relative 1Q values pitchers$HperInn1Q = 1 pitchers$nonHRhitsPerInn1Q = 1 pitchers$ERperInn1Q = 1 pitchers$HRperInn1Q = 1 pitchers$BBperInn1Q = 1 pitchers$uBBperInn1Q = 1 pitchers$SOperInn1Q = 1 pitchers$IBBperInn1Q = 1 pitchers$WPperInn1Q = 1 pitchers$HBPperInn1Q = 1 pitchers$BKperInn1Q = 1 pitchers$BattersPerInn1Q = 1 pitchers$RperInn1Q = 1 pitchers$SHperInn1Q = 1 pitchers$SFperInn1Q = 1 pitchers$GIDPperInn1Q = 1 pitchers$otherOutPerInn1Q = 1 pitchers$ChperInn1Q = 1 pitchers$POperInn1Q = 1 pitchers$AperInn1Q = 1 pitchers$EperInn1Q = 1 pitchers$DPperInn1Q = 1 pitchers$PBperInn1Q = 1 pitchers$SB_catchPerOpp1Q = 1 pitchers$CS_catchPerOpp1Q = 1 pitchers$CSoppsPerInn1Q = 1 pitchers$ABperPA1Q = 1 pitchers$RperPA1Q = 1 pitchers$HperPA1Q = 1 pitchers$SingPerPA1Q = 1 pitchers$DoubPerPA1Q = 1 pitchers$TripPerPA1Q = 1 pitchers$HRperPA1Q = 1 pitchers$RBIperPA1Q = 1 pitchers$SBperAtt1Q = 1 pitchers$CSperAtt1Q = 1 pitchers$SB_attPerPA1Q = 1 pitchers$BBperPA1Q = 1 pitchers$uBBperPA1Q = 1 pitchers$SOperPA1Q = 1 pitchers$IBBperPA1Q = 1 pitchers$HBPperPA1Q = 1 pitchers$SHperPA1Q = 1 pitchers$SFperPA1Q = 1 pitchers$GIDPperPA1Q = 1 pitchers$otherOutPerPA1Q = 1 #find each player's first quartile values based on their position distributions for (n in 1:length(pitchers$playerID)){ for (k in 170:215){ pitchers[n,k] = quartiles[1,k-54]*pitchers[n,"PropSP"]+quartiles[2,k-54]*pitchers[n,"PropRP"] } } #find each player's quartile rate values pitchers$ERA1Q = 1 pitchers$WHIP1Q = 1 pitchers$BA_opp1Q = 1 pitchers$OBP_opp1Q = 1 pitchers$BABIP_opp1Q = 1 pitchers$FIP1Q = 1 pitchers$PitVA1Q = 1 pitchers$CSpercent1Q = 1 pitchers$FieldPercent1Q = 1 pitchers$FieldVA1Q = 1 pitchers$RFG1Q = 1 pitchers$RF91Q = 1 pitchers$SBpercent1Q = 1 pitchers$BaseVA1Q = 1 pitchers$BA1Q = 1 pitchers$OBP1Q = 1 pitchers$SLG1Q = 1 pitchers$OPS1Q = 1 pitchers$wOBA1Q = 1 pitchers$BatVA1Q = 1 #populate player first quartile rate values for (n in 1:length(pitchers$playerID)){ pitchers[n,"ERA1Q"] = quartiles[1,"ERA"]*pitchers[n,"PropSP"]+quartiles[2,"ERA"]*pitchers[n,"PropRP"] pitchers[n,"WHIP1Q"] = quartiles[1,"WHIP"]*pitchers[n,"PropSP"]+quartiles[2,"WHIP"]*pitchers[n,"PropRP"] pitchers[n,"BA_opp1Q"] = quartiles[1,"BA_opp"]*pitchers[n,"PropSP"]+quartiles[2,"BA_opp"]*pitchers[n,"PropRP"] pitchers[n,"OBP_opp1Q"] = quartiles[1,"OBP_opp"]*pitchers[n,"PropSP"]+quartiles[2,"OBP_opp"]*pitchers[n,"PropRP"] pitchers[n,"BABIP_opp1Q"] = quartiles[1,"BABIP_opp"]*pitchers[n,"PropSP"]+quartiles[2,"BABIP_opp"]*pitchers[n,"PropRP"] pitchers[n,"FIP1Q"] = quartiles[1,"FIP"]*pitchers[n,"PropSP"]+quartiles[2,"FIP"]*pitchers[n,"PropRP"] pitchers[n,"PitVA1Q"] = quartiles[1,"PitVA"]*pitchers[n,"PropSP"]+quartiles[2,"PitVA"]*pitchers[n,"PropRP"] pitchers[n,"CSpercent1Q"] = quartiles[1,"CSpercent"]*pitchers[n,"PropSP"]+quartiles[2,"CSpercent"]*pitchers[n,"PropRP"] pitchers[n,"FieldPercent1Q"] = quartiles[1,"FieldPercent"]*pitchers[n,"PropSP"]+quartiles[2,"FieldPercent"]*pitchers[n,"PropRP"] pitchers[n,"FieldVA1Q"] = quartiles[1,"FieldVA"]*pitchers[n,"PropSP"]+quartiles[2,"FieldVA"]*pitchers[n,"PropRP"] pitchers[n,"RFG1Q"] = quartiles[1,"RFG"]*pitchers[n,"PropSP"]+quartiles[2,"RFG"]*pitchers[n,"PropRP"] pitchers[n,"RF91Q"] = quartiles[1,"RF9"]*pitchers[n,"PropSP"]+quartiles[2,"RF9"]*pitchers[n,"PropRP"] pitchers[n,"SBpercent1Q"] = quartiles[1,"SBpercent"]*pitchers[n,"PropSP"]+quartiles[2,"SBpercent"]*pitchers[n,"PropRP"] pitchers[n,"BaseVA1Q"] = quartiles[1,"BaseVA"]*pitchers[n,"PropSP"]+quartiles[2,"BaseVA"]*pitchers[n,"PropRP"] pitchers[n,"BA1Q"] = quartiles[1,"BA"]*pitchers[n,"PropSP"]+quartiles[2,"BA"]*pitchers[n,"PropRP"] pitchers[n,"OBP1Q"] = quartiles[1,"OBP"]*pitchers[n,"PropSP"]+quartiles[2,"OBP"]*pitchers[n,"PropRP"] pitchers[n,"SLG1Q"] = quartiles[1,"SLG"]*pitchers[n,"PropSP"]+quartiles[2,"SLG"]*pitchers[n,"PropRP"] pitchers[n,"OPS1Q"] = quartiles[1,"OPS"]*pitchers[n,"PropSP"]+quartiles[2,"OPS"]*pitchers[n,"PropRP"] pitchers[n,"wOBA1Q"] = quartiles[1,"wOBA"]*pitchers[n,"PropSP"]+quartiles[2,"wOBA"]*pitchers[n,"PropRP"] pitchers[n,"BatVA1Q"] = quartiles[1,"BatVA"]*pitchers[n,"PropSP"]+quartiles[2,"BatVA"]*pitchers[n,"PropRP"] } #reorder the quartile rate values to be by the other rate values pitchers = pitchers[,c(1:35,216,36,217,37,218,38,219,39,220,40,221,41,222,42:59,223,60,224,61,225,62,226,63,227,64:78,228,79,229,80:89,230,90,231,91,232,92,233,93,234,94,235,95:215)] #get rid of NaN values due to dividing by 0 PAs or Innings pitchers[is.na(pitchers)] = 0 #get player per PA or Inn values above/below the 1st quartile pitchers$HabovePerInn= pitchers$HperInn - pitchers$HperInn1Q pitchers$nonHRhitsAbovePerInn= pitchers$nonHRhitsPerInn - pitchers$nonHRhitsPerInn1Q pitchers$ERabovePerInn = pitchers$ERperInn - pitchers$ERperInn1Q pitchers$HRabovePerInn = pitchers$HRperInn - pitchers$HRperInn1Q pitchers$BBabovePerInn = pitchers$BBperInn - pitchers$BBperInn1Q pitchers$uBBabovePerInn = pitchers$uBBperInn - pitchers$uBBperInn1Q pitchers$SOabovePerInn = pitchers$SOperInn - pitchers$SOperInn1Q pitchers$IBBabovePerInn = pitchers$IBBperInn - pitchers$IBBperInn1Q pitchers$WPabovePerInn = pitchers$WPperInn - pitchers$WPperInn1Q pitchers$HBPabovePerInn = pitchers$HBPperInn - pitchers$HBPperInn1Q pitchers$BKabovePerInn = pitchers$BKperInn - pitchers$BKperInn1Q pitchers$BattersAbovePerInn = pitchers$BattersPerInn - pitchers$BattersPerInn1Q pitchers$RabovePerInn = pitchers$RperInn - pitchers$RperInn1Q pitchers$SHabovePerInn = pitchers$SHperInn - pitchers$SHperInn1Q pitchers$SFabovePerInn = pitchers$SFperInn - pitchers$SFperInn1Q pitchers$GIDPabovePerInn = pitchers$GIDPperInn - pitchers$GIDPperInn1Q pitchers$otherOutAbovePerInn = pitchers$otherOutPerInn - pitchers$otherOutPerInn1Q pitchers$ABabovePerPA = pitchers$ABperPA - pitchers$ABperPA1Q pitchers$RabovePerPA = pitchers$RperPA - pitchers$RperPA1Q pitchers$HabovePerPA = pitchers$HperPA - pitchers$HperPA1Q pitchers$SingAbovePerPA = pitchers$SingPerPA - pitchers$SingPerPA1Q pitchers$DoubAbovePerPA = pitchers$DoubPerPA - pitchers$DoubPerPA1Q pitchers$TripAbovePerPA = pitchers$TripPerPA - pitchers$TripPerPA1Q pitchers$HRabovePerPA = pitchers$HRperPA - pitchers$HRperPA1Q pitchers$RBIabovePerPA = pitchers$RBIperPA - pitchers$RBIperPA1Q pitchers$SBabovePerAtt = pitchers$SBperAtt - pitchers$SBperAtt1Q pitchers$CSabovePerAtt = pitchers$CSperAtt - pitchers$CSperAtt1Q pitchers$SBattAbovePerPA = pitchers$SB_attPerPA - pitchers$SB_attPerPA1Q pitchers$BBabovePerPA = pitchers$BBperPA - pitchers$BBperPA1Q pitchers$uBBabovePerPA = pitchers$uBBperPA - pitchers$uBBperPA1Q pitchers$SOabovePerPA = pitchers$SOperPA - pitchers$SOperPA1Q pitchers$IBBabovePerPA = pitchers$IBBperPA - pitchers$IBBperPA1Q pitchers$HBPabovePerPA = pitchers$HBPperPA - pitchers$HBPperPA1Q pitchers$SHabovePerPA = pitchers$SHperPA - pitchers$SHperPA1Q pitchers$SFabovePerPA = pitchers$SFperPA - pitchers$SFperPA1Q pitchers$GIDPabovePerPA = pitchers$GIDPperPA - pitchers$GIDPperPA1Q pitchers$otherOutAbovePerPA = pitchers$otherOutPerPA - pitchers$otherOutPerPA1Q pitchers$ChAbovePerInn = pitchers$ChperInn - pitchers$ChperInn1Q pitchers$POabovePerInn = pitchers$POperInn - pitchers$POperInn1Q pitchers$AabovePerInn = pitchers$AperInn - pitchers$AperInn1Q pitchers$EabovePerInn = pitchers$EperInn - pitchers$EperInn1Q pitchers$DPabovePerInn = pitchers$DPperInn - pitchers$DPperInn1Q pitchers$PBabovePerInn = pitchers$PBperInn - pitchers$PBperInn1Q pitchers$SB_catchAbovePerOpp = pitchers$SB_catchPerOpp - pitchers$SB_catchPerOpp1Q pitchers$CS_catchAbovePerOpp = pitchers$CS_catchPerOpp - pitchers$CS_catchPerOpp1Q pitchers$CSoppsAbovePerInn = pitchers$CSoppsPerInn - pitchers$CSoppsPerInn1Q #get above/below values based on the # of player PAs or Ch Inns pitchers$Habove = pitchers$HabovePerInn*pitchers$IP pitchers$nonHRhitsAbove = pitchers$nonHRhitsAbovePerInn*pitchers$IP pitchers$ERabove = pitchers$ERabovePerInn*pitchers$IP pitchers$HRabove = pitchers$HRabovePerInn*pitchers$IP pitchers$BBabove = pitchers$BBabovePerInn*pitchers$IP pitchers$uBBabove = pitchers$uBBabovePerInn*pitchers$IP pitchers$SOabove = pitchers$SOabovePerInn*pitchers$IP pitchers$IBBabove = pitchers$IBBabovePerInn*pitchers$IP pitchers$WPabove = pitchers$WPabovePerInn*pitchers$IP pitchers$HBPabove = pitchers$HBPabovePerInn*pitchers$IP pitchers$BKabove = pitchers$BKabovePerInn*pitchers$IP pitchers$BattersAbove = pitchers$BattersAbovePerInn*pitchers$IP pitchers$Rabove = pitchers$RabovePerInn*pitchers$IP pitchers$SHabove = pitchers$SHabovePerInn*pitchers$IP pitchers$SFabove = pitchers$SFabovePerInn*pitchers$IP pitchers$GIDPabove = pitchers$GIDPabovePerInn*pitchers$IP pitchers$otherOutAbove = pitchers$otherOutAbovePerInn*pitchers$IP pitchers$ABabove = pitchers$ABabovePerPA*pitchers$PA pitchers$R_batAbove = pitchers$RabovePerPA*pitchers$PA pitchers$H_batAbove = pitchers$HabovePerPA*pitchers$PA pitchers$SingAbove = pitchers$SingAbovePerPA*pitchers$PA pitchers$DoubAbove = pitchers$DoubAbovePerPA*pitchers$PA pitchers$TripAbove = pitchers$TripAbovePerPA*pitchers$PA pitchers$HR_batAbove = pitchers$HRabovePerPA*pitchers$PA pitchers$RBIabove = pitchers$RBIabovePerPA*pitchers$PA pitchers$SBabove = pitchers$SBabovePerAtt*pitchers$SB_att pitchers$CSabove = pitchers$CSabovePerAtt*pitchers$SB_att pitchers$SBattAbove = pitchers$SBattAbovePerPA*pitchers$PA pitchers$BB_batAbove = pitchers$BBabovePerPA*pitchers$PA pitchers$uBB_batAbove = pitchers$uBBabovePerPA*pitchers$PA pitchers$SO_batAbove = pitchers$SOabovePerPA*pitchers$PA pitchers$IBB_batAbove = pitchers$IBBabovePerPA*pitchers$PA pitchers$HBP_batAbove = pitchers$HBPabovePerPA*pitchers$PA pitchers$SH_batAbove = pitchers$SHabovePerPA*pitchers$PA pitchers$SF_batAbove = pitchers$SFabovePerPA*pitchers$PA pitchers$GIDP_batAbove = pitchers$GIDPabovePerPA*pitchers$PA pitchers$otherOut_batAbove = pitchers$otherOutAbovePerPA*pitchers$PA pitchers$ChAbove = pitchers$ChAbovePerInn*pitchers$Inn_field pitchers$POabove = pitchers$POabovePerInn*pitchers$Inn_field pitchers$Aabove = pitchers$AabovePerInn*pitchers$Inn_field pitchers$Eabove = pitchers$EabovePerInn*pitchers$Inn_field pitchers$DPabove = pitchers$DPabovePerInn*pitchers$Inn_field pitchers$PBabove = pitchers$PBabovePerInn*pitchers$Inn_field pitchers$SB_catchAbove = pitchers$SB_catchAbovePerOpp*pitchers$CSopps pitchers$CS_catchAbove = pitchers$CS_catchAbovePerOpp*pitchers$CSopps pitchers$CSoppsAbove = pitchers$CSoppsAbovePerInn*pitchers$Inn_field #adjusted putouts and assists for difficulty #estimate that on assisted putouts, the assist is worth 80% of the out and the putout is worth 20% #only dock C and 1B for assisted (or SO) putouts, since they get the most #must dock by % of time at first, but must also use relative frequency (don't include DH) #divide into 1B or C (relative to DH) piece with unassisted/nonstrikeout POs, #1B or C (relative to DH) piece with assisted/strikeout POs, #and non-1B or C (relative to DH) piece pitchers$Adj1BAssistPOs = pitchers$POabove*(pitchers$PropFirst/(1-pitchers$PropDH))*.9 pitchers$Adj1BUnassistPOs = pitchers$POabove*(pitchers$PropFirst/(1-pitchers$PropDH))*.1 pitchers$AdjCstrikeoutPOs = pitchers$POabove*(pitchers$PropCatch/(1-pitchers$PropDH))*.93 pitchers$AdjCnonstrikeoutPOs = pitchers$POabove*(pitchers$PropCatch/(1-pitchers$PropDH))*.07 pitchers$AdjNon1BorC_POs = pitchers$POabove*(1-((pitchers$PropFirst+pitchers$PropCatch)/(1-pitchers$PropDH))) #determine total values based on run value weights #round weights to 2 decimals #pitchers can't/don't hit or run bases anymore, so set to 0 pitchers$BattingValue = 0 pitchers$BaserunningValue = 0 pitchers$PitchingValue = -1.29*pitchers$HRabove - .37*pitchers$nonHRhitsAbove -.22*pitchers$uBBabove -.24*pitchers$HBPabove +.22*pitchers$SHabove +.10*pitchers$SFabove +.75*pitchers$GIDPabove+.34*pitchers$SOabove+.33*pitchers$otherOutAbove-.26*pitchers$WPabove - .25*pitchers$BKabove pitchers$FieldingValue = .33*pitchers$AdjNon1BorC_POs + .33*pitchers$Adj1BUnassistPOs + .33*.2*pitchers$Adj1BAssistPOs + .33*pitchers$AdjCnonstrikeoutPOs + .33*.33*pitchers$AdjCstrikeoutPOs +.33*.8*pitchers$Aabove - .68*pitchers$Eabove - .26*pitchers$PBabove + .39*pitchers$CS_catchAbove - .15*pitchers$SB_catchAbove + .1*pitchers$DPabove pitchers[is.na(pitchers)] = 0 pitchers$TotalValue = pitchers$BattingValue + pitchers$BaserunningValue + pitchers$PitchingValue + pitchers$FieldingValue #reorder columns and get rid of NAs pitchers = pitchers[,c(1:6,333:337,7:332)] pitchers[is.na(pitchers)] = 0 pitchers$Name2 <- NULL #create file for Excel write.csv(pitchers,"~/Documents/Baseball Stats/Player Value/2022/2022PitcherValues.csv") write.csv(quartiles,"~/Documents/Baseball Stats/Player Value/2022/2022quartiles_P.csv") write.csv(starter,"~/Documents/Baseball Stats/Player Value/2022/2022starters.csv") write.csv(reliever,"~/Documents/Baseball Stats/Player Value/2022/2022relievers.csv") #group players by their primary position for (i in 1:length(pitchers$playerID)){ if (pitchers$PropSP[i]>=.5){ pitchers$MainPOS[i]="SP" } if (pitchers$PropRP[i]>=.5){ pitchers$MainPOS[i]="RP" } } pitchers$MainPOS = as.factor(pitchers$MainPOS) #plot total value by position ggplot(data = pitchers, mapping = aes(x = MainPOS, y = TotalValue)) + geom_boxplot() + labs(x = 'Position', y = 'Total Value') #plot pitching value by position ggplot(data = pitchers, mapping = aes(x = MainPOS, y =PitchingValue)) + geom_boxplot() + labs(x = 'Position', y = 'Pitching Value') #plot fielding value by position ggplot(data = pitchers, mapping = aes(x = MainPOS, y =FieldingValue)) + geom_boxplot() + labs(x = 'Position', y = 'Fielding Value') #aggregate teams pitchers$oneTeam = T pitchers$oneTeam = ifelse(pitchers$teamID=='TOT',F,T) oneTeam = filter(pitchers,oneTeam==T & (MainPOS=='SP' | MainPOS=='RP')) oneTeam$teamID = as.factor(oneTeam$teamID) #plot total value by team ggplot(data = oneTeam, mapping = aes(x = teamID, y =TotalValue)) + geom_boxplot() + labs(x = 'Team', y = 'Total Value') #get total values by team oneTeam = as.data.frame(oneTeam) oneTeam %>% group_by(teamID) %>% summarise(Means = mean(TotalValue), Medians = median(TotalValue), Freq = n()) %>% print(n=30) #plot pitching value by team ggplot(data = oneTeam, mapping = aes(x = teamID, y =PitchingValue)) + geom_boxplot() + labs(x = 'Team', y = 'Pitching Value') #get pitching values by team oneTeam = as.data.frame(oneTeam) oneTeam %>% group_by(teamID) %>% summarise(Means = mean(PitchingValue), Medians = median(PitchingValue), Freq = n()) %>% print(n=30) #plot fielding value by team ggplot(data = oneTeam, mapping = aes(x = teamID, y =FieldingValue)) + geom_boxplot() + labs(x = 'Team', y = 'Fielding Value') #get fielding values by team oneTeam = as.data.frame(oneTeam) oneTeam %>% group_by(teamID) %>% summarise(Means = mean(FieldingValue), Medians = median(FieldingValue), Freq = n()) %>% print(n=30) #MLB by position ggplot(pitchers, aes(x = FieldingValue, y = PitchingValue, color = factor(MainPOS))) + geom_point() + geom_abline(intercept=0,slope=-1) + geom_abline(intercept=quantile(pitchers$TotalValue,.95),slope=-1,color="green") + geom_abline(intercept=quantile(pitchers$TotalValue,.99),slope=-1,color="darkgreen") + geom_abline(intercept=quantile(pitchers$TotalValue,.05),slope=-1,color="red") + geom_abline(intercept=quantile(pitchers$TotalValue,.01),slope=-1,color="darkred")+ labs(title="Player Value By Position, MLB")+ coord_cartesian(xlim=c(-5,10),ylim=c(-40,25)) #MLB by team ggplot(oneTeam, aes(x = FieldingValue, y = PitchingValue, color = factor(teamID))) + geom_point() + geom_abline(intercept=0,slope=-1) + geom_abline(intercept=quantile(pitchers$TotalValue,.95),slope=-1,color="green") + geom_abline(intercept=quantile(pitchers$TotalValue,.99),slope=-1,color="darkgreen") + geom_abline(intercept=quantile(pitchers$TotalValue,.05),slope=-1,color="red") + geom_abline(intercept=quantile(pitchers$TotalValue,.01),slope=-1,color="darkred")+ labs(title="Player Value By Team, MLB")+ coord_cartesian(xlim=c(-5,10),ylim=c(-40,25)) AL = filter(oneTeam,lgID=='AL') NL = filter(oneTeam,lgID=='NL') #AL by position ggplot(AL, aes(x = FieldingValue, y = PitchingValue, color = factor(MainPOS))) + geom_point() + geom_abline(intercept=0,slope=-1) + geom_abline(intercept=quantile(AL$TotalValue,.95),slope=-1,color="green") + geom_abline(intercept=quantile(AL$TotalValue,.99),slope=-1,color="darkgreen") + geom_abline(intercept=quantile(AL$TotalValue,.05),slope=-1,color="red") + geom_abline(intercept=quantile(AL$TotalValue,.01),slope=-1,color="darkred")+ labs(title="Player Value By Position, AL")+ coord_cartesian(xlim=c(-5,10),ylim=c(-40,25)) #AL by team ggplot(AL, aes(x = FieldingValue, y = PitchingValue, color = factor(teamID))) + geom_point() + geom_abline(intercept=0,slope=-1) + geom_abline(intercept=quantile(AL$TotalValue,.95),slope=-1,color="green") + geom_abline(intercept=quantile(AL$TotalValue,.99),slope=-1,color="darkgreen") + geom_abline(intercept=quantile(AL$TotalValue,.05),slope=-1,color="red") + geom_abline(intercept=quantile(AL$TotalValue,.01),slope=-1,color="darkred")+ labs(title="Player Value By Team, AL")+ coord_cartesian(xlim=c(-5,10),ylim=c(-40,25)) #NL by position ggplot(NL, aes(x = FieldingValue, y = PitchingValue, color = factor(MainPOS))) + geom_point() + geom_abline(intercept=0,slope=-1) + geom_abline(intercept=quantile(NL$TotalValue,.95),slope=-1,color="green") + geom_abline(intercept=quantile(NL$TotalValue,.99),slope=-1,color="darkgreen") + geom_abline(intercept=quantile(NL$TotalValue,.05),slope=-1,color="red") + geom_abline(intercept=quantile(NL$TotalValue,.01),slope=-1,color="darkred")+ labs(title="Player Value By Position, NL")+ coord_cartesian(xlim=c(-5,10),ylim=c(-40,25)) #NL by team ggplot(NL, aes(x = FieldingValue, y = PitchingValue, color = factor(teamID))) + geom_point() + geom_abline(intercept=0,slope=-1) + geom_abline(intercept=quantile(NL$TotalValue,.95),slope=-1,color="green") + geom_abline(intercept=quantile(NL$TotalValue,.99),slope=-1,color="darkgreen") + geom_abline(intercept=quantile(NL$TotalValue,.05),slope=-1,color="red") + geom_abline(intercept=quantile(NL$TotalValue,.01),slope=-1,color="darkred")+ labs(title="Player Value By Team, NL")+ coord_cartesian(xlim=c(-5,10),ylim=c(-40,25)) summary(pitchers$TotalValue) 9*30 sum(pitchers$TotalValue>=5)/length(pitchers$TotalValue) sum(pitchers$TotalValue>=10)/length(pitchers$TotalValue) sum(pitchers$TotalValue>=15)/length(pitchers$TotalValue) sum(pitchers$TotalValue>=20)/length(pitchers$TotalValue) sum(pitchers$TotalValue>=25)/length(pitchers$TotalValue) sum(pitchers$TotalValue>=30)/length(pitchers$TotalValue) sum(pitchers$TotalValue>=35)/length(pitchers$TotalValue) sum(pitchers$TotalValue>=40)/length(pitchers$TotalValue) sum(pitchers$TotalValue>=45)/length(pitchers$TotalValue) sum(pitchers$TotalValue>=50)/length(pitchers$TotalValue) quantile(pitchers$TotalValue,.99) quantile(pitchers$TotalValue,.01)