SPECIALS = c('row.names', 'rownames', 'names')

#' Look up values in dictionary [from the 'expss' package version 0.10.7]
#' 
#' \code{vlookup}/\code{vlookup_df} function is inspired by VLOOKUP spreadsheet
#' function. It looks for a \code{lookup_value} in the \code{lookup_column} of
#' the \code{dict}, and then returns values in the same rows from
#' \code{result_column}. \code{add_columns} inspired by MATCH FILES (Add
#' variables...) from SPSS Statistics. It works similar to SQL left join but
#' number of cases in the left part always remain the same. If there are
#' duplicated keys in the \code{dict} then error will be raised by default.
#' \code{.add_columns} is the same function for default dataset.
#' 
#' @param lookup_value Vector of looked up values
#' @param dict data.frame/matrix. Dictionary. Can be vector for
#'   \code{vlookup}/\code{vlookup_df}.
#' @param result_column numeric or character. Resulting columns of \code{dict}.
#'   There are special values: 'row.names', 'rownames', 'names'. If
#'   \code{result_column} equals to one of these special values and \code{dict}
#'   is matrix/data.frame then row names of \code{dict} will be returned. If
#'   \code{dict} is vector then names of vector will be returned. For
#'   \code{vlookup_df} default \code{result_column} is NULL and result will be
#'   entire rows. For \code{vlookup} default \code{result_column} is 2 - for
#'   frequent case of dictionary with keys in the first column and results in
#'   the second column.
#' @param lookup_column Column of \code{dict} in which lookup value will be
#'   searched. By default it is the first column of the \code{dict}. There are
#'   special values: 'row.names', 'rownames', 'names'. If lookup_column equals
#'   to one of these special values and \code{dict} is matrix/data.frame then
#'   values will be searched in the row names of \code{dict}. If \code{dict} is
#'   vector then values will be searched in names of the \code{dict}.
#' @param data data.frame to be joined with \code{dict}.
#' @param by character vector or NULL(default) or 1. Names of common variables
#'   in the \code{data} and \code{dict} by which we will attach \code{dict} to
#'   \code{data}. If it is NULL then common names will be used. If it is equals
#'   to 1 then we will use the first column from both dataframes. To add columns
#'   by different variables on \code{data} and \code{dict} use a named vector.
#'   For example, \code{by = c("a" = "b")} will match data.a to dict.b.
#' @param ignore_duplicates logical Should we ignore duplicates in the \code{by}
#'   variables in the \code{dict}? If it is TRUE than first occurrence of duplicated
#'   key will be used.
#' @return \code{vlookup} always return vector, \code{vlookup_df} always returns
#'   data.frame. \code{row.names} in result of \code{vlookup_df} are not
#'   preserved.
#' 
#' @author Gregory Demin and Sebastian Jeworutzki ('expss' package version 0.10.7)
#' 
#'
#' 
#' @export
vlookup <- function(lookup_value, dict, result_column = 2, lookup_column = 1){
    stopif(length(result_column)>1, "result_column shoud be vector of length 1.")
    vlookup_internal(lookup_value = lookup_value, 
                     dict = dict, 
                     result_column = result_column, 
                     lookup_column = lookup_column, df = FALSE)
}


#' @export
#' @rdname vlookup
vlookup_df <- function(lookup_value, dict, result_column = NULL, lookup_column = 1) {
    vlookup_internal(lookup_value = lookup_value, 
                     dict = dict, 
                     result_column = result_column, 
                     lookup_column = lookup_column, df = TRUE)
}


#' @export
#' @rdname vlookup
add_columns <- function(data, dict, 
                       by = NULL, 
                       ignore_duplicates = FALSE
){
    UseMethod("add_columns")
    
}

#' @importFrom data.table is.data.table :=
#' @export
add_columns.data.frame <- function(data, dict, 
                       by = NULL, 
                       ignore_duplicates = FALSE
){
    if(!is.data.frame(data)) data = as.sheet(data)
    if(!is.data.frame(dict)) dict = as.sheet(dict)
    # ..by_data = NULL
    # ..by = NULL
    colnames_data = colnames(data)
    colnames_dict = colnames(dict)
    if(is.null(by)){
        by = intersect(colnames_data, colnames_dict)
        stopif(length(by)==0, "'add_columns' - no common column names between 'data' and 'dict'.")
        message(paste0("Adding columns by ", paste(dQuote(by), collapse = ", ")))
    }
    if(identical(by, 1) || identical(by, 1L)){
        lookup_value = data[[1]]    
        lookup_column = dict[[1]]   
        col_nums_dict = 1
    } else {
        stopif(!is.character(by), "'add_columns' - 'by' should be character, NULL or 1.")
        if(!is.null(names(by))){
            by_data = names(by)
            by_data[by_data==""] = by[by_data==""]
        } else {
            by_data = by
        }
        stop_if_columns_not_exist(colnames_data, by_data)
        stop_if_columns_not_exist(colnames_dict, by)
        if(length(by)>1){
            stopif(anyDuplicated(by), "'add_columns'- duplicated variable names in 'by': ", 
                   paste(dQuote(by[duplicated(by)]), collapse = ", "))
            stopif(anyDuplicated(by_data), "'add_columns'- duplicated variable names in 'by': ", 
                   paste(dQuote(by_data[duplicated(by_data)]), collapse = ", "))
            if(data.table::is.data.table(data)){
                lookup_value = data[ , by_data, with = FALSE]     
            } else {
                lookup_value = data[ , by_data]
            }
            if(data.table::is.data.table(dict)){
                lookup_column = dict[ , by, with = FALSE]                     
            } else {
                lookup_column = dict[ , by] 
            }
            lookup_value = do.call("paste", c(unlab(lookup_value), sep = "\r"))
            lookup_column = do.call("paste", c(unlab(lookup_column), sep = "\r"))
        } else {
            lookup_value = data[[by_data]]    
            lookup_column = dict[[by]]  
        }
        col_nums_dict = match(by, colnames_dict)
    }
    if(!ignore_duplicates){
        stopif(anyDuplicated(lookup_column), 
               "'add_columns' - duplicated values in 'by' columns in 'dict'")
    }
    # calculate index
    ind = fast_match(lookup_value, lookup_column, NA_incomparable = FALSE)
    if(data.table::is.data.table(dict)){
        res = subset_dataframe(dict[,-col_nums_dict, with = FALSE], ind, drop = FALSE)    
    } else {
        res = subset_dataframe(dict[,-col_nums_dict, drop = FALSE], ind, drop = FALSE)    
    }
    
    # make unique names in res
    colnames_res = colnames(res)
    dupl = intersect(colnames_res, colnames_data)
    if(length(dupl)>0){
        warning(
            paste0("'add_columns' - some names in 'dict' duplicate names in 'data': ",
                   paste(dupl, collapse = ", ")
                   )
            )
        all_names = make.unique(c(colnames_data, colnames_res), sep = "_")
        # we change only dictionary names 
        colnames(res) = all_names[-seq_along(colnames_data)]
    }
    if(data.table::is.data.table(data)){
        data[, colnames(res):=res]    
    } else {
        data[, colnames(res)] = res    
    }
    
    data

}


#' @importFrom huxtable add_columns
#' @export
add_columns.huxtable <- function(...){
    huxtable::add_columns(...)
}

#' @export
#' @rdname vlookup
.add_columns <- function (dict, 
                         by = NULL, 
                         ignore_duplicates = FALSE) {
    reference = suppressMessages(default_dataset())
    data = ref(reference)
    data = add_columns(data, 
                       dict = dict, 
                       by = by, 
                       ignore_duplicates = ignore_duplicates)
    ref(reference) = data
    invisible(data)
}


#' This is the vlookup_internal function
#' 
#' Internal vlookup function
#' 
#' @param df logical vector. If TRUE, it's a data.frame (default). If FALSE, it's
#'   not a data.frame.
#' @param lookup_value Vector of looked up values
#' @param dict data.frame/matrix. Dictionary. Can be vector for
#'   vlookup/vlookup_df.
#' @param result_column numeric or character. Resulting columns of dict.
#'   There are special values: 'row.names', 'rownames', 'names'. If
#'   result_column equals to one of these special values and dict
#'   is matrix/data.frame then row names of dict will be returned. If
#'   dict is vector then names of vector will be returned. For
#'   vlookup_df default result_column is NULL and result will be
#'   entire rows. For vlookup default result_column is 2 - for
#'   frequent case of dictionary with keys in the first column and results in
#'   the second column.
#' @param lookup_column Column of dict in which lookup value will be
#'   searched. By default it is the first column of the dict. There are
#'   special values: 'row.names', 'rownames', 'names'. If lookup_column equals
#'   to one of these special values and dict is matrix/data.frame then
#'   values will be searched in the row names of dict. If dict is
#'   vector then values will be searched in names of the dict.
#' @param data data.frame to be joined with dict.
#' @param by character vector or NULL(default) or 1. Names of common variables
#'   in the data and dict by which we will attach dict to
#'   data. If it is NULL then common names will be used. If it is equals
#'   to 1 then we will use the first column from both dataframes. To add columns
#'   by different variables on data and dict use a named vector.
#'   For example, by = c("a" = "b") will match data.a to dict.b.
#' @param ignore_duplicates logical Should we ignore duplicates in the by
#'   variables in the dict? If it is TRUE than first occurrence of duplicated
#'   key will be used.
#' @return vlookup_internal is an internal function.
#'   
#' @author Gregory Demin and Sebastian Jeworutzki ('expss' package version 0.10.7)
#'  
#' @noRd
#' @keywords internal
#' @export
vlookup_internal <- function(lookup_value, 
                            dict, 
                            result_column = NULL, 
                            lookup_column = 1, 
                            df = TRUE) {
    stopif(is.list(lookup_value) || NCOL(lookup_value)!=1,
           "'vlookup' - incorrect 'lookup_value'. 'lookup_value' should be vector but its class is ", 
           paste(class(lookup_value), collapse = ", "))
    # validate lookup_column
    stopif(length(lookup_column)!=1L,"'vlookup' - 'lookup_column' shoud be vector of length 1.")
    stopif(!is.numeric(lookup_column) && !is.character(lookup_column),
           "'vlookup' - 'lookup_column' shoud be character or numeric.")
    stopif(is.numeric(lookup_column) && max(lookup_column,na.rm = TRUE)>NCOL(dict),
           "'vlookup' - 'lookup_column' is greater than number of columns in the dict.")
    stopif(is.numeric(lookup_column) && any(lookup_column <= 0),
           "'vlookup' - 'lookup_column' should be positive.")
    stopif(is.character(lookup_column) && (is.data.frame(dict) || is.matrix(dict)) && 
               !all(setdiff(lookup_column, SPECIALS) %in% colnames(dict)),
           "'vlookup' - 'lookup_column' doesn't exists in column names of the dict.")
    
    
    # validate result_column
    stopif(!is.null(result_column) && any(is.na(result_column)), "NA's in result_column")
    
    stopif(is.numeric(result_column) && max(result_column,na.rm = TRUE)>NCOL(dict),
           "result_column is greater than number of columns in the dict.")
    stopif(is.character(result_column) && (is.data.frame(dict) || is.matrix(dict)) && 
               !all(setdiff(result_column, SPECIALS) %in% colnames(dict)),
           "some names in result_column doesn't exists in column names of the dict.")
    
    if(is.matrix(dict) || is.data.frame(dict)){
        dict_was_vector = FALSE    
    } else {
        dict_was_vector = TRUE    
    }    
    if(any(SPECIALS %in% result_column) || any(SPECIALS %in% lookup_column)){
        if(is.matrix(dict) || is.data.frame(dict)){
            curr_rowlabs = rownames(dict)
            
        } else {
            curr_rowlabs = names(dict)
            
        }
    }
    if(!is.data.frame(dict)) dict = as.sheet(dict)
    if(any(SPECIALS %in% result_column) || any(SPECIALS %in% lookup_column)){
        dict[["...RRRLLL..."]] = curr_rowlabs
        if(any(SPECIALS %in% result_column)) result_column[result_column %in% SPECIALS] = "...RRRLLL..."
        if(any(SPECIALS %in% lookup_column)) lookup_column[lookup_column %in% SPECIALS] = "...RRRLLL..."
    }
    # calculate index
    ind = fast_match(lookup_value, dict[[lookup_column]], NA_incomparable = FALSE)
    ### calculate result
    if(df){
        if (is.null(result_column)){
            result = subset_dataframe(dict, ind, drop = FALSE)
        } else {
            
            result = subset_dataframe(dict, ind, drop = FALSE)[, result_column, drop = FALSE]
            
        }
        colnames(result)[colnames(result) %in% "...RRRLLL..."] = "row_names"
        # if(dict_was_vector) rownames(result) = NULL
    } else {
        if (is.null(result_column)){
            result = ind
        } else {
            result = dict[[result_column]][ind]
        }
           
    }
    result
}


## stop if condition with message
stopif <- function(cond,...){
    if (cond) {
        stop(do.call(paste0,c(list(...))),call. = FALSE)
    }
    invisible()
}


subset_dataframe <- function(x, j, drop = TRUE){
    if(NCOL(x)==1 && drop){
        return(x[[1]][j])    
    }
    res = lapply(x, universal_subset, j, drop = drop)
    class(res) = class(x)
    if(NCOL(x)>0){
        attr(res, "row.names") = seq_len(NROW(res[[1]]))
    }
    res
}


fast_match <- function(x, table, nomatch = NA_integer_, NA_incomparable = FALSE){
    if(is.character(x) && is.character(table)){
        ind = data.table::chmatch(x, table, nomatch = nomatch) 
        if(NA_incomparable) {
            ind[is.na(x)] = nomatch
        }
    } else {
        if(NA_incomparable) {
            ind = match(x, table, 
                        nomatch = nomatch, 
                        incomparables = NA)
        } else {
            ind = match(x, table,
                        nomatch = nomatch, 
                        incomparables = NULL)
        }    
    }
    ind
}


#' @importFrom data.table is.data.table
universal_subset <- function(data, index, drop = TRUE){
    if(is.matrix(data)){
        data =  data[index, , drop = drop]
    } else if(is.data.frame(data)){
        if(data.table::is.data.table(data)){
            data = data[index, ]
        } else {
            data = subset_dataframe(data, index, drop = drop)
        }
    } else {
        data =  data[index]
    }
    data
}