##################################################################################################
################# Author:              WIK                      ##################################
#################           Calculation of Malmquist DEA        ##################################
#################                Electricity DSOs (VNB)         ##################################
#################              Frontier Shift RP23              ##################################
#################      based on efficiency benchmarking         ##################################
############     based on data delivered by BNetzA 2018/10/18   ##################################
##################################################################################################


##################################################################################################
###############################    Preparation                  ##################################
##################################################################################################

### Load special libraries

# Read- and write-options
#options(java.parameters="-Xmx8g")
#library(XLConnectJars)
library(XLConnect)

# Bogetoft/Otto-packages
library(lpSolveAPI)
library(ucminf)
library(Benchmarking)

### Clear workspace and keep track of initial workdir
rm(list=ls())
getwd()
setwd("..")

### User definitions
setwd("Daten")
source<-"PFG4_Analysen_Daten_VNB.xlsx"
sheet<-"VNB-RP23"

# NBoK in DEA einschließen? "0" = "ausschließen", "1" = "einschließen"
NBoK <- 0

# only those DMUs existent in all periods
# case1: dins23_v1 == 1
# case2: dins23_v2 == 1

case<-c("case1","case2")

### cost variable
C<-c("N_totex","N_stotex")

Modell<-c("RP2", "RP3")

### output variables
#RP2: 
RP2 <- c("N2_jhlaus",	"N2_vfl",	"N2_nltot",	"N2_aptot",	"N2_paptot",	"N2_rvtot",	"N2_mstot",	"N2_apfern",	"N2_avbk456")
#RP3:
RP3 <- c("N3_jhlaus",	"N3_rvtot",	"N3_apg5b", "N3_avbk456",	"N3_mstot")


### Orientation ("in", "out")
Orientation="in" 

### number of regulatory periods
### for RP23 adjust source file accordingly
periods<-2 


### name of variable capturing time index 
Time_index<-"rp" 

### Outlier detection with trimming 

##################################################################################################
##################################################################################################


##################################################################################################
###################################           Data Import         ################################
##################################################################################################

options(java.parameters="-Xmx8g")
.Workbook<-loadWorkbook(source)

deadata<-readWorksheet(.Workbook,sheet,startRow=1,startCol=1,header=T,rownames=F)

# NBoK in DEA ausschließen
if (NBoK == 0){
  deadata <- subset(deadata, N_kfl > 0)
}

for (a in 1:length(case)) {

for (b in 1:length(C)) {
  
for (g in 1:length(Modell)) {
  
  print(case[a])
  print(C[b])
  print(Modell[g])
  

if (case[a]=="case2") {
  deadataF<-subset(deadata, dins23_v2 == 1) 
  deadataE<-subset(deadata, dins23_v2 == 1) 
} else {
  deadataF<-subset(deadata, dins23_v1 == 1) 
  deadataE<-subset(deadata, dins23_v1 == 1)
}

  if (Modell[g]=="RP2") {
    D<-RP2
    RTS<-"irs"
  } else {
    D<-RP3
    RTS<-"crs"
  }

# subsamples for regulatory periods j

for (j in 1:periods) {
  assign (paste("deadataF",j, sep="_"), subset(deadataF, get(Time_index)==j))
  assign (paste("deadataE",j, sep="_"), subset(deadataE, get(Time_index)==j))
}
  

##################################################################################################
##################################################################################################


##################################################################################################
#################################       Outlier detection      ###################################
##################################################################################################

  ### Outlier detection
  # per period
  
  ####################################   dominance criterion      #################################
  
  for (j in 1:periods) {
    
    n_obs = dim(get(paste("deadataF",j, sep="_"))[C[b]])[1]
    assign(paste("F_test",j,sep="_"), NULL)
    assign(paste("F_outlier",j,sep="_"), NULL)
    
    for ( i in 1:n_obs ) {
      
      if (Orientation=="in") {
        A=Benchmarking::dea(
          get(paste("deadataF",j, sep="_"))[-i,C[b]],
          get(paste("deadataF",j, sep="_"))[-i,D], 
          ORIENTATION=Orientation,RTS=RTS)$eff
      } else {
        A=1/Benchmarking::dea(
          get(paste("deadataF",j, sep="_"))[-i,C[b]],
          get(paste("deadataF",j, sep="_"))[-i,D], 
          ORIENTATION=Orientation,RTS=RTS)$eff
      }
      
      
      # CH:   Cost efficiency, half normal
      # OE:   Output efficiency, exponential
      # OH:   Output efficiency, half normal
      # LOE:  Log of output efficiency, exponential
      # LOH:  Log of output efficiency, half normal
      
      A[is.na(A)]<-1
      
      # Dominance tests: CH, OE, OH, LOE, LOH
      SS1_CH=t(A-1)%*%(A-1)
      
      A_OE<- 1/A
      SS1_OE<- sum(A_OE-1)
      
      A_OH <- 1/A
      SS1_OH <- t(A_OH-1)%*%(A_OH-1) 
      
      A_LOE <- log(1/A)
      SS1_LOE <- sum(A_LOE)
      
      A_LOH<-log(1/A)                                     
      SS1_LOH=t(A_LOH)%*%(A_LOH)
      
      
      if (Orientation=="in") {
        B=Benchmarking::dea(
          get(paste("deadataF",j, sep="_"))[C[b]],
          get(paste("deadataF",j, sep="_"))[D], 
          ORIENTATION=Orientation,RTS=RTS)$eff[-i]
      } else {
        B=1/Benchmarking::dea(
          get(paste("deadataF",j, sep="_"))[C[b]],
          get(paste("deadataF",j, sep="_"))[D], 
          ORIENTATION=Orientation,RTS=RTS)$eff[-i]
      } 
      B[is.na(B)]<-1
      
      SS2_CH=t(B-1)%*%(B-1)   
      
      B_OE<- 1/B
      SS2_OE<- sum(B_OE-1)
      
      B_OH <- 1/B
      SS2_OH <- t(B_OH-1)%*%(B_OH-1) 
      
      B_LOE <- log(1/B)
      SS2_LOE <- sum(B_LOE)
      
      B_LOH<-log(1/B)                                     
      SS2_LOH=t(B_LOH)%*%(B_LOH)
      
      
      E_CH <- pf(SS1_CH/SS2_CH, n_obs-1,n_obs-1)   
      E_OE <- 1-pf(SS2_OE/SS1_OE, 2*(n_obs-1),2*(n_obs-1))
      E_OH <- 1-pf(SS2_OH/SS1_OH, n_obs-1, n_obs-1)
      E_LOE <- 1-pf(SS2_LOE/SS1_LOE, 2*(n_obs-1),2*(n_obs-1))
      E_LOH <- 1-pf(SS2_LOH/SS1_LOH, n_obs-1, n_obs-1)
      
      E<- E_CH
    
    if (E<0.05) {
      assign(paste("F_outlier",j,sep="_"), 
             rbind(get(paste("F_outlier",j,sep="_")),
                   get(paste("deadataF",j,sep="_"))$DMU[i]))
      assign(paste("F_test",j,sep="_"), 
             rbind(get(paste("F_test",j,sep="_")),E))
    } else {
      assign(paste("F_test",j,sep="_"), 
             rbind(get(paste("F_test",j,sep="_")),E))
    }
  }
  
  assign(paste("F_eliminate",j,sep="_"), 
         get(paste("F_outlier",j,sep="_"))[,1])
  assign(paste("Prob_F_test",j,sep="_"), 
         get(paste("F_test",j,sep="_"))[,1])
}

#################################    super efficiency criterion    ##############################

for (j in 1:periods) {
  
  # if no dominant DMU code fails
  
  if (is.null(get(paste("F_outlier",j,sep="_")))) {
    
    # DEA
    
    assign(paste("dea",j,sep="_"), 
           Benchmarking::dea(get(paste("deadataF",j,sep="_"))[C[b]],
                             get(paste("deadataF",j, sep="_"))[D],
                             ORIENTATION=Orientation,RTS=RTS))
    assign(paste("d_dea",j,sep="_"), 
           get(paste("dea",j,sep="_"))$eff)
    
    # Calculate superefficiencies
    
    assign(paste("dea_sup",j,sep="_"), 
           Benchmarking::sdea(get(paste("deadataF",j, sep="_"))[C[b]],
                              get(paste("deadataF",j, sep="_"))[D], 
                              ORIENTATION=Orientation,RTS=RTS))
    
    if (Orientation=="in") {
      assign(paste("d_dea_sup",j,sep="_"), 
             get(paste("dea_sup",j,sep="_"))$eff)
    } else {
      assign(paste("d_dea_sup",j,sep="_"), 
             1/get(paste("dea_sup",j,sep="_"))$eff)
    } 
    
    assign(paste("q",j,sep="_"), 
           as.matrix(quantile(get(paste("d_dea_sup",j,sep="_")))))
    assign(paste("q_extreme",j,sep="_"), 
           get(paste("q",j,sep="_"))[4,1]
           +1.5*(get(paste("q",j,sep="_"))[4,1]
                 -get(paste("q",j,sep="_"))[2,1]))
    assign(paste("q_outlier",j,sep="_"),
           ifelse(get(paste("d_dea_sup",j,sep="_"))>get(paste("q_extreme",j,sep="_")),1,0))
    
    n_obs = dim(get(paste("deadataF",j, sep="_"))[C[b]])[1]
    assign(paste("q_eliminate",j,sep="_"), NULL)
    
    for ( i in 1:n_obs ) {
      
      if (get(paste("q_outlier",j,sep="_"))[i]==1) {
        assign(paste("q_eliminate",j,sep="_"), 
               cbind(get(paste("q_eliminate",j,sep="_")),
                     get(paste("deadataF",j,sep="_"))$DMU[i])) 
      }
    }
    
  }else{
    
    # New dataset without dominant DMUs
    
    assign(paste("deadata_q",j,sep="_"), 
           subset(deadata,get(Time_index)==j))
    assign(paste("deadata_q",j,sep="_"), 
           get(paste("deadata_q",j,sep="_"))[-c(get(paste("F_eliminate",j,sep="_"))),])
    if (case[a]=="case2") {
      assign(paste("deadata_q",j,sep="_"), 
             subset(get(paste("deadata_q",j,sep="_")), 
                    dins23_v2 == 1))
    } else {
      assign(paste("deadata_q",j,sep="_"), 
             subset(get(paste("deadata_q",j,sep="_")), 
                    dins23_v1 == 1))
    }
    
    
    # DEA without dominant DMUs
    
    assign(paste("dea",j,sep="_"), 
           Benchmarking::dea(get(paste("deadata_q",j,sep="_"))[C[b]],
                             get(paste("deadata_q",j, sep="_"))[D],
                             ORIENTATION=Orientation,RTS=RTS))
    assign(paste("d_dea",j,sep="_"), 
           get(paste("dea",j,sep="_"))$eff)
    
    # Calculate superefficiencies without dominant DMUs
    
    assign(paste("dea_sup",j,sep="_"), 
           Benchmarking::sdea(get(paste("deadata_q",j, sep="_"))[C[b]],
                              get(paste("deadata_q",j, sep="_"))[D], 
                              ORIENTATION=Orientation,RTS=RTS))
    
    if (Orientation=="in") {
      assign(paste("d_dea_sup",j,sep="_"), 
             get(paste("dea_sup",j,sep="_"))$eff)
    } else {
      assign(paste("d_dea_sup",j,sep="_"), 
             1/get(paste("dea_sup",j,sep="_"))$eff)
    } 
    
    assign(paste("q",j,sep="_"), 
           as.matrix(quantile(get(paste("d_dea_sup",j,sep="_")))))
    assign(paste("q_extreme",j,sep="_"), 
           get(paste("q",j,sep="_"))[4,1]
           +1.5*(get(paste("q",j,sep="_"))[4,1]
                 -get(paste("q",j,sep="_"))[2,1]))
    assign(paste("q_outlier",j,sep="_"),
           ifelse(get(paste("d_dea_sup",j,sep="_"))>
                    get(paste("q_extreme",j,sep="_")),1,0))
    
    n_obs = dim(get(paste("deadata_q",j, sep="_"))[C[b]])[1]
    assign(paste("q_eliminate",j,sep="_"), NULL)
    
    for ( i in 1:n_obs ) {
      
      if (get(paste("q_outlier",j,sep="_"))[i]==1) {
        
        assign(paste("q_eliminate",j,sep="_"), 
               cbind(get(paste("q_eliminate",j,sep="_")),
                     get(paste("deadata_q",j,sep="_"))$DMU[i])) 
      }
    }
  }
}

##################################################################################################
##################################################################################################


##################################################################################################
#################################       Malmquist calculations   #################################
##################################################################################################


### Adtusted without outliers

### Trimming

### construct variable "eliminate"

assign(paste("eliminate"),NULL)
assign(paste("outlier_dom"),NULL)
assign(paste("outlier_sup"),NULL)

for (j in 1:periods) {
  
  assign(paste("eliminate"),
         c(get(paste("eliminate")),   
               get(paste("F_eliminate",j,sep="_"))))
  assign(paste("eliminate"),
         c(get(paste("eliminate")),   #ST: s.o.
               get(paste("q_eliminate",j,sep="_"))))
  assign(paste("outlier_dom"),
         cbind(get(paste("outlier_dom")),
               get(paste("F_eliminate",j,sep="_"))))
  assign(paste("outlier_sup"),
         cbind(get(paste("outlier_sup")),
               get(paste("q_eliminate",j,sep="_"))))
}

assign(paste("eliminate"),
       sort(get(paste("eliminate")),
            decreasing=FALSE))
assign(paste("eliminate"),
       unique(get(paste("eliminate"))))

assign(paste("outlier_dom"),
       sort(get(paste("outlier_dom")),
            decreasing=FALSE))
assign(paste("outlier_dom"),
       unique(get(paste("outlier_dom"))))

assign(paste("outlier_sup"),
       sort(get(paste("outlier_sup")),
            decreasing=FALSE))
assign(paste("outlier_sup"),
       unique(get(paste("outlier_sup"))))

### construct dataset without all outliers (dominance + super efficiency)

# if no outlier code fails

if (is.null(get(paste("eliminate")))) {
  
  for (j in 1:periods) {
    
    assign(paste("deadata",j,sep="_"), 
           subset(deadata,get(Time_index)==j))
    if (case[a]=="case2") {
      assign(paste("deadataF",j,sep="_"), 
             subset(get(paste("deadata",j,sep="_")), 
                    dins23_v2 == 1)) 
      assign(paste("deadataE",j,sep="_"), 
             subset(get(paste("deadata",j,sep="_")), 
                    dins23_v2 == 1)) 
    } else {
      assign(paste("deadataF",j,sep="_"), 
             subset(get(paste("deadata",j,sep="_")), 
                    dins23_v1 == 1)) 
      assign(paste("deadataE",j,sep="_"), 
             subset(get(paste("deadata",j,sep="_")), 
                    dins23_v1 == 1)) 
    }
    
  }
  
} else {
  
  for (j in 1:periods) {
    
    assign(paste("eliminate",j,sep="_"), 
           get(paste("eliminate")))
    assign(paste("deadata",j,sep="_"), 
           subset(deadata,get(Time_index)==j))
    assign(paste("deadata",j,sep="_"), 
           get(paste("deadata",j,sep="_"))[-c(get(paste("eliminate",j,sep="_"))),])
    if (case[a]=="case2") {
      assign(paste("deadataF",j,sep="_"), 
             subset(get(paste("deadata",j,sep="_")), 
                    dins23_v2 == 1)) 
      assign(paste("deadataE",j,sep="_"), 
             subset(get(paste("deadata",j,sep="_")), 
                    dins23_v2 == 1)) 
    } else{
      assign(paste("deadataF",j,sep="_"), 
             subset(get(paste("deadata",j,sep="_")), 
                    dins23_v1 == 1)) 
      assign(paste("deadataE",j,sep="_"), 
             subset(get(paste("deadata",j,sep="_")), 
                    dins23_v1 == 1)) 
    }
   
  }
}

### Malmquist
# calculation of eeiciency scores in j to technology in j+1 (with j=1,...,periods)

for( j in 1:(periods-1)) {
  
  if (length(get(paste("eliminate",j+1,sep="_")))>0) {
    
    assign(paste("dea",j,j+1,sep="_"), 
           Benchmarking::dea(get(paste("deadataE",j, sep="_"))[C[b]], 
                             get(paste("deadataE",j, sep="_"))[D],
                             XREF=get(paste("deadataF",j+1, sep="_"))[C[b]],
                             YREF=get(paste("deadataF",j+1, sep="_"))[D],
                             ORIENTATION=Orientation,RTS=RTS))
    
  } else {
    
    assign(paste("dea",j,j+1,sep="_"), 
           Benchmarking::dea(get(paste("deadataE",j, sep="_"))[C[b]], 
                             get(paste("deadataE",j, sep="_"))[D],
                             XREF=get(paste("deadataF",j+1, sep="_"))[C[b]],
                             YREF=get(paste("deadataF",j+1, sep="_"))[D],
                             ORIENTATION=Orientation,RTS=RTS))
    
  }
  
  if (Orientation=="in") {
    
    assign(paste("d_dea",j,j+1,sep="_"), 
           get(paste("dea",j,j+1,sep="_"))$eff)
    
  } else {
    
    assign(paste("d_dea",j,j+1,sep="_"), 
           1/get(paste("dea",j,j+1,sep="_"))$eff)
    
  }} 

# calculation of efficiency scores in j to technology in j (with j=1,...,periods)

for( j in 1:periods) {
  
  if (length(get(paste("eliminate",j,sep="_")))>0) {
    
    assign(paste("dea",j,j,sep="_"), 
           Benchmarking::dea(get(paste("deadataE",j, sep="_"))[C[b]], 
                             get(paste("deadataE",j, sep="_"))[D],
                             XREF=get(paste("deadataF",j, sep="_"))[C[b]], 
                             YREF=get(paste("deadataF",j, sep="_"))[D],
                             ORIENTATION=Orientation,RTS=RTS))
    
  } else {
    
    assign(paste("dea",j,j,sep="_"), 
           Benchmarking::dea(get(paste("deadataE",j, sep="_"))[C[b]], 
                             get(paste("deadataE",j, sep="_"))[D],
                             XREF=get(paste("deadataF",j, sep="_"))[C[b]],
                             YREF=get(paste("deadataF",j, sep="_"))[D],
                             ORIENTATION=Orientation,RTS=RTS))
    
  }
  
  if (Orientation=="in") {
    
    assign(paste("d_dea",j,j,sep="_"), 
           get(paste("dea",j,j,sep="_"))$eff)
    
  } else {
    
    assign(paste("d_dea",j,j,sep="_"), 
           1/get(paste("dea",j,j,sep="_"))$eff)
    
  }} 

## calculation of efficiency scores in j+1 to technology in j (with j=1,...,periods)

for( j in 1:(periods-1)) {
  
  if (length(get(paste("eliminate",j,sep="_")))>0) {
    
    assign(paste("dea",j+1,j,sep="_"), 
           Benchmarking::dea(get(paste("deadataE",j+1, sep="_"))[C[b]], 
                             get(paste("deadataE",j+1, sep="_"))[D],
                             XREF=get(paste("deadataF",j, sep="_"))[C[b]],
                             YREF=get(paste("deadataF",j, sep="_"))[D],
                             ORIENTATION=Orientation,RTS=RTS))
    
  } else {
    
    assign(paste("dea",j+1,j,sep="_"), 
           Benchmarking::dea(get(paste("deadataE",j+1, sep="_"))[C[b]], 
                             get(paste("deadataE",j+1, sep="_"))[D],
                             XREF=get(paste("deadataF",j, sep="_"))[C[b]],
                             YREF=get(paste("deadataF",j, sep="_"))[D],
                             ORIENTATION=Orientation,RTS=RTS))
    
  }
  
  if (Orientation=="in") {
    
    assign(paste("d_dea",j+1,j,sep="_"), 
           get(paste("dea",j+1,j,sep="_"))$eff)
    
  } else {
    
    assign(paste("d_dea",j+1,j,sep="_"), 
           1/get(paste("dea",j+1,j,sep="_"))$eff)
    
  }} 

# Catch-up

for (j in 1:(periods-1)) {
  
  assign(paste("catch_up",j,j+1,sep="_"), 
         get(paste("d_dea",j+1,j+1,sep="_"))/get(paste("d_dea",j,j,sep="_")))
  
  # Frontier-Shift
  
  assign(paste("frontier_shift",j,j+1,sep="_"), 
         sqrt((get(paste("d_dea",j,j,sep="_"))/get(paste("d_dea",j,j+1,sep="_")))*
                (get(paste("d_dea",j+1,j,sep="_"))/get(paste("d_dea",j+1,j+1,sep="_")))))
  
  # Malmquist
  
  assign(paste("malmquist",j,j+1,sep="_"), 
         get(paste("catch_up",j,j+1,sep="_"))
         *get(paste("frontier_shift",j,j+1,sep="_")))
  
}

##################################################################################################
##################################################################################################


##################################################################################################
#################################       Data output              #################################
##################################################################################################
setwd("..")
setwd("Ergebnisse")

### collect output 
# overall summary

n_obs <- (dim(get(paste("deadataE"))[C[b]])[1])/2
n_outlier <- length(get(paste("eliminate")))
Min <- round(min(frontier_shift_1_2),digits=5)
Mean <- round(mean(frontier_shift_1_2), digits=5)
Max <- round(max(frontier_shift_1_2), digits=5)
SD <- round(sd(frontier_shift_1_2), digits=5)

dmu_output<-as.matrix(deadataE_1[,"DMU"])

for (j in 1:(periods-1)) {
  dmu_output <- cbind(dmu_output, get(paste("catch_up",j,j+1,sep="_")))
  dmu_output <- cbind(dmu_output, get(paste("frontier_shift",j,j+1,sep="_")))
  dmu_output <- cbind(dmu_output, get(paste("malmquist",j,j+1,sep="_")))
}

coln<-c("DMU")

for (j in 1:(periods-1)) {
  coln <- c(coln, paste("catch_up",j,j+1,sep="_"))
  coln <- c(coln, paste("frontier_shift",j,j+1,sep="_"))
  coln <- c(coln, paste("malmquist",j,j+1,sep="_"))}

colnames(dmu_output)<-coln

c1<-c("Dateiname", "sheet", "RTS",
      "Orientation", "Perioden", "Input", "Output", "Zeitindex")

c2<-c(source, sheet, RTS, 
      Orientation, periods, toString(C[b]), toString(D), Time_index)

c3<-cbind(c1,c2)


d1<-c("obs","outlier","Min","Mean","Max","SD")

d2<-c(n_obs,n_outlier,Min,Mean,Max,SD)
d3<-cbind(d1,d2)


### bnr, nnr, name exportieren
mat_bnr <- as.matrix(deadataE_2$bnr)
mat_nnr <- as.matrix(deadataE_2$nnr)

dmu_output <- cbind(dmu_output,mat_bnr,mat_nnr)
colnames(dmu_output)<-c(coln,"bnr","nnr")


d_dmu_output<-as.matrix(deadataE_1[,"DMU"])
for (j in 1:periods) {
  d_dmu_output<-cbind(d_dmu_output, get(paste("d_dea",j,j,sep="_")))
}
dmu_output <- cbind(dmu_output,n_obs,n_outlier)


### Output in excel
writeWorksheetToFile(paste("PFG4_Malmquist_DEA_VNB_RP23.xlsx"), 
                     data=dmu_output, sheet=paste("Out",case[a],C[b],Modell[g],sep="_"), 
                     startRow=1, startCol=1, header=T, clearSheets=T)

writeWorksheetToFile(paste(format(Sys.time(), "%Y-%m-%d"), "_PFG4_Malmquist_DEA_VNB_RP23",".xlsx",sep=""), 
                     data=dmu_output, sheet=paste("Out",case[a],C[b],Modell[g],sep="_"), 
                     startRow=1, startCol=1, header=T, clearSheets=T)



}
}
}

