Source code for pyDEA.core.utils.dea_utils

''' This module contains various utility functions and constants.

    Attributes:

        FILE_TYPES (list of tuple of str, str): list of supported
            input file parameters.
        SOLUTION_XLSX_FILE (list of tuple of str, str): list of
            supported solution file formats (xlsx).
        SOLUTION_XLS_FILE (list of tuple of str, str): list of
            supported solution file formats (xls).

            Note:
                This list does not contain csv format since for solution
                files we create a folder with all csv files inside.
        TEXT_FOR_PANEL (str): text displayed in the label on the data tab before
            path to input data file.
        TEXT_FOR_FILE_LBL (str): text displayed in the label on the solution tab
            before path to input data file.
        ZERO_TOLERANCE (double): is used to check if a value is non-zero. All
            values greater than ZERO_TOLERANCE are considered to be strictly
            positive.
        VALID_COEFF (int): constant used to indicate valid data entry.
        WARNING_COEFF (int): constant used to indicate valid data entry that
            is zero.
        NOT_VALID_COEFF (int): constant used to indicate invalid data entry.
        EMPTY_COEFF (int): constant used to indicate empty data entry.
        CELL_DESTROY (int): constant used to indicate that cell of a table was
            destroyed.
        CHANGE_CATEGORY_NAME (int): constant used to indicate that name of the
            category was changed.
        INPUT_OBSERVER (int): constant used to indicate the observer for input
            categories.
        OUTPUT_OBSERVER (int): constant used to indicate the observer for
            output categories.
        XPAD_VALUE (int): horizontal padding.
        YPAD_VALUE (int): vertical padding.
        bg_color (hex): background colour for all widgets.
        TMP_FOLDER (str): name of the folder where all pickled files will
            be stored and then removed.
'''

from tkinter import StringVar
from tkinter import ALL
import os
import pkg_resources
import logging
from logging.config import fileConfig

LOG_FILE = 'logging_config.ini'
PACKAGE = 'pyDEA'

FILE_TYPES = [('Excel (xls)', '*.xls'), ('Excel (xlsx)', '*.xlsx'),
              ('Text CSV', '*.csv')]
SOLUTION_XLSX_FILE = [('Excel (xlsx)', '*.xlsx')]
SOLUTION_XLS_FILE = [('Excel (xls)', '*.xls')]
TEXT_FOR_PANEL = 'File: '
TEXT_FOR_FILE_LBL = 'Data from file: '

ZERO_TOLERANCE = 1e-10

VALID_COEFF = 1
WARNING_COEFF = 0
NOT_VALID_COEFF = -1
EMPTY_COEFF = -2
CELL_DESTROY = -3
CHANGE_CATEGORY_NAME = -4

INPUT_OBSERVER = 1
OUTPUT_OBSERVER = 2

XPAD_VALUE = 10
YPAD_VALUE = 5

bg_color = '#E8E9FA'

TMP_FOLDER = 'tmp'


[docs]class ObserverStringVar(StringVar):
    ''' This class extends StringVar and adds two data structures to it for
        storing input and output categories.

        Attributes:
            output_categories (list of str): list with output categories
            input_categories (list of str): list with input categories
    '''
    def __init__(self, *args, **kw):
        StringVar.__init__(self, *args, **kw)
        self.output_categories = []
        self.input_categories = []


[docs]def get_logger():
    ''' Gets a logger with all configuration specified in file ini-file.

        Returns:
            logger: configured logger
    '''
    logfile = pkg_resources.resource_filename(PACKAGE, LOG_FILE)
    fileConfig(logfile)
    return logging.getLogger()


[docs]def change_to_unique_name_if_needed(file_name):
    ''' Given a file name, this function checks if there is a file with
        such a name, and generates a new unique name if the file exists.

        Args:
            file_name (str): file name

        Returns:
            str: if given file does not exist, this file name is returned.
                If given file exists, the unique incremented file name is
                returned.
    '''
    if os.path.exists(file_name):
        i = 1
        base_name, file_extension = os.path.splitext(file_name)
        file_name = '{0}_{1}{2}'.format(base_name, i, file_extension)
        while os.path.exists(file_name):
            i += 1
            file_name = '{0}_{1}{2}'.format(base_name, i, file_extension)
    return file_name


[docs]def clean_up_pickled_files():
    ''' Removes mps-files from current folder and p-files from temporary folder.
    '''
    if os.path.exists(TMP_FOLDER):
        filelist = [f for f in os.listdir(TMP_FOLDER) if f.endswith('.p')]
        for f in filelist:
            os.remove(os.path.join(TMP_FOLDER, f))
    # remove mps file if any
    filelist = [f for f in os.listdir('.') if f.endswith('.mps')]
    for f in filelist:
        os.remove(f)


[docs]def format_data(data):
    ''' Formats floating point number to 6 digits.

        Args:
            data (double): data that must be formatted

        Returns:
            str: formatted data.

        Example:
            >>> format_data(1.222222222222222)
            >>> '1.222222'
            >>> format_data('str')
            >>> 'str'
            >>> format_data(0.123456789)
            >>> '0.123457'
            >>> format_data('0.05')
            >>> '0.050000'
    '''
    try:
        float_data = float(data)
    except ValueError:
        return str(data)
    else:
        return '%.6f' % float_data


[docs]def auto_name_if_needed(params, output_format, new_output_dir=''):
    ''' Creates an automatic name for solution file based on current
        parameter values, if OUTPUT_FILE is empty or set to auto.

        Args:
            params (Parameters): parameters
            output_format (str): output format of solution file that
                should be used. Allowed values: xls, xlsx, csv
            new_output_dir (str, optional): directory where solution must be
                stored. It should be specified if it is different from current
                folder. Defaults to empty string.

        Returns:
            str: automatic name if OUTPUT_FILE is empty or set to auto in
                parameters.

        Raises:
            ValueError: if output_format is not 'xls', 'xlsx' or 'csv'.
    '''
    output_name = params.get_parameter_value('OUTPUT_FILE')
    if output_name.lower() == 'auto' or output_name.strip() == '':
        input_file_name = params.get_parameter_value('DATA_FILE')
        input_base_name = os.path.basename(input_file_name)
        input_base_name, ext_tmp = os.path.splitext(input_base_name)
        ext = output_format
        if ext not in ['xls', 'xlsx', 'csv']:
            raise ValueError('{0} is not supported output format'.format(ext))
        output_name = os.path.join(new_output_dir,
                                   input_base_name + '_result.' + ext)
        output_name = change_to_unique_name_if_needed(output_name)
    return output_name


[docs]def calculate_nb_pages(nb_data_rows, nb_table_rows):
    ''' Calculates number of pages given number of data rows and number of rows
        in the table.

        Note:
            first row is reserved for categories.

        Args:
            nb_data_rows (int): number of data rows.

                Warning:
                    It must be a positive number. Otherwise the function
                    will return incorrect value.

            nb_table_rows (int): number of rows in the table.

                Warning:
                    It must be a positive number. Otherwise the function
                    will return incorrect value.

        Returns:
            int: number of pages

        Example:
            >>> calculate_nb_pages(100, 10)
            >>> 12
            >>> calculate_nb_pages(25, 20)
            >>> 2
            >>> calculate_nb_pages(0, 20)
            >>> 1
            >>> calculate_nb_pages(10, 25):
            >>> 1

    '''
    if nb_table_rows:
        base = int(nb_data_rows / (nb_table_rows - 1))
        if nb_data_rows % (nb_table_rows - 1) != 0:
            base += 1
        return max(base, 1)
    return 0


[docs]def calculate_start_row_index(curr_page, nb_table_rows):
    ''' Calculates row index of data that will be displayed given
        current page and number of rows in the table.

        Note:
            The first row of the table is reserved for displaying categories.

        Args:
            curr_page (int): current page number. Pages start from 1.
            nb_table_rows (int): number of rows in the table.

        Returns:
            int: data row index
    '''
    if (curr_page == 0):
        return 0
    return (curr_page - 1) * (nb_table_rows - 1)


[docs]def validate_category_name(name, category_index, current_categories):
    ''' Checks if given category name is valid. Name is valid if it is not
        a number, if it does not contain semicolon and if it is not duplicated.

        Args:
            name (str): category name.
            category_index (int): index of this category in the list of
                current categories.
            current_categories (list of str): list of current categories.

        Returns:
            str: given category name if this name is valid, empty
                string otherwise.

        Example:
            >>> validate_category_name('I1n', 0, ['I1n', 'I2', 'O2])
            >>> 'I1n'
            >>> validate_category_name('I1n;', 0, ['I1n;', 'I2', 'O2])
            >>> ''
            >>> validate_category_name('1.2', 0, ['1.2', 'I2', 'O2])
            >>> ''
            >>> validate_category_name('I1n', 0, ['I1n', 'I1n', 'O2])
            >>> ''
    '''
    try:
        float(name)
    except ValueError:
        if ';' in name:
            return ''
        for index, category in enumerate(current_categories):
            if index != category_index and category == name:
                return ''
        return name
    else:
        return ''


[docs]def on_canvas_resize(canvas):
    ''' This function updates scroll region of the canvas.
        It should be called on canvas resize.

        Args:
            canvas (Canvas): canvas
    '''
    canvas.update_idletasks()
    yscroll = 0
    xscroll = 0
    if canvas.bbox(ALL)[2] > canvas.winfo_width():
        yscroll = canvas.bbox(ALL)[2]
    if canvas.bbox(ALL)[3] > canvas.winfo_height():
        xscroll = canvas.bbox(ALL)[3]
    canvas['scrollregion'] = (0, 0, yscroll, xscroll)


[docs]def center_window(widget, width=None, height=None):
    ''' Centres widget in the middle of the screen.

        Args:
            widget (Tk object): widget that needs to be centred.
            width (int, optional): width of the widget that should be used.
                If not specified, widget width is used.
            height (int, optional): height of the widget that should be used.
                If not specified, widget width is used.
    '''
    widget.withdraw()
    widget.update_idletasks()
    if width is None:
        width = widget.winfo_reqwidth()

    if height is None:
        height = widget.winfo_reqheight()

    sw = widget.winfo_screenwidth()
    sh = widget.winfo_screenheight()

    x = (sw - width)/2
    y = (sh - height)/2
    widget.geometry('%dx%d+%d+%d' % (width, height, x, y))
    widget.deiconify()


[docs]def create_params_str(params):
    ''' Creates string from values of ORIENTATION and RETURN_TO_SCALE
        specified in parameters.

        Args:
            params (Parameters): parameters.
    '''
    return '{0} orientation, {1}'.format(
        params.get_parameter_value('ORIENTATION'),
        params.get_parameter_value('RETURN_TO_SCALE'))


[docs]def is_valid_coeff(coeff):
    ''' Checks if given coefficient is valid. Valid coefficient is positive
        floating point or integer number.

        Args:
            coeff (double): data coefficient.

        Returns:
            int: 1, if coefficient is valid, 0 if coefficient is zero,
                -1, if coefficient is invalid.
    '''
    try:
        coeff = float(coeff)
        if coeff < 0:
            return NOT_VALID_COEFF
        elif coeff == 0:
            return WARNING_COEFF
        else:
            return VALID_COEFF
    except ValueError:
        return NOT_VALID_COEFF


[docs]def is_efficient(efficiency_score, lambda_variable):
    ''' Checks if dmu with given efficiency score and value of
        lambda variable is efficient.

        Args:
            efficiency_score (double): efficiency spyDEA.core.
            lambda_variable (double): value of lambda variable corresponding
                to DMU under consideration.

        Returns:
            bool: True if DMU is efficient, False otherwise.

        Example:
            >>> is_efficient(1, 1)
            True
            >>> is_efficient(0.5, 0)
            False
            >>> is_efficient(0.9999999, 1)
            True
            >>> is_efficient(1.0000001, 1)
            True

    '''
    if efficiency_score == 1:
        return True
    if lambda_variable > ZERO_TOLERANCE:
        return True
    # it seems that lambda_variable is not ultimate indication of efficiency
    if efficiency_score > 1:
        return True
    return False


[docs]def check_input_and_output_categories(input_data):
    ''' Raises ValueError if input or output categories are empty.

        Args:
            input_data (InputData): objects that stores all input data.

        Raises:
            ValueError: if input or output categories are empty.
    '''
    if (len(input_data.input_categories) == 0 or
            len(input_data.output_categories) == 0):
        raise ValueError('Both input and output categories must be specified')


[docs]def check_categories(categories_to_ckeck, categories, message=''):
    ''' Raises ValueError if at least one of the given categories is not
        present in categories list.

        Args:
            categories_to_ckeck (list of str): list of categories that must
                be checked.
            categories (list of str): list of current categories.
            message (str, optional): message that must be shown if ValueError
                is raised.

        Raises:
            ValueError: if at least one of the categories is not present in
                the list of current categories.

        Example:
            >>> check_categories(['I1', 'I2'], ['I1', 'O1', 'O2'])
            >>> ValueError
            >>> check_categories(['I1', 'O2'], ['I1', 'O1', 'O2'])
            >>>
    '''
    for category in categories_to_ckeck:
        if category not in categories:
            if not message:
                message = ('Category <{0}> is not present in categories: {1}'.
                           format(category, categories))
            raise ValueError(message)


[docs]def parse_price_ratio(elem, value, constraint, categories, bounds):
    ''' Parses price ratio constraints and writes result to bounds.
        This function is internal utility function that is used for parsing
        weight restrictions in parse_constraint().

        Args:
            elem (str): string that describes left hand side of price ratio
                constraint.
            value (str): string that describes right hand side of price
                ratio constraint.
            constraint (str): string that describes entire constraint.
            categories (set of str): set of current categories.
            bounds (dict of tuple of str, str to str or empty dictionary):
                dictionary where parsed constraint will be written.

        Raises:
            ValueError: if constraint cannot be parsed.

        Example:
            >>> bounds = dict()
            >>> s = set(['I1', 'I2', 'O1', 'O2'])
            >>> parse_price_ratio('I1/I2', '5', 'I1/I2 >= 5', s, bounds)
            >>> bounds
            >>> {('I1', 'I2'): 5}
    '''
    two_categories = elem.split('/')
    if len(two_categories) != 2:
        raise ValueError('Cannot parse constraint: {0}'.format(
                         constraint))
    two_categories[0] = two_categories[0].strip()
    two_categories[1] = two_categories[1].strip()
    if (two_categories[0] not in categories or
            two_categories[1] not in categories):
        raise ValueError('Incorrect constraint, category does not'
                         ' exist: {0}'.format(
                            constraint))
    key = two_categories[0], two_categories[1]
    bounds[key] = float(value)


[docs]def parse_constraint(constraint, split_str, new_bounds_lb, new_bounds_ub,
                     categories):
    ''' Parses weight restriction constraint. This is internal utility function
        that is used in create_bounds().

        Args:
            constraint (str): constraint that needs to be parsed.
            split_str (str): '>=' or '<='.
            new_bounds_lb (dict of tuple of str, str to str or empty
                dictionary): dictionary where parsed constraint will be written.
                This dictionary will be filled if split_str is '>='.
            new_bounds_ub (dict of tuple of str, str to str or empty
                dictionary): dictionary where parsed constraint will be written.
                This dictionary will be filled if  split_str is '<='.
            categories (set of str): set of current categories.

        Returns:
            bool: True if split_str was found in constraint, False otherwise.

        Raises:
            ValueError: if constraint cannot be parsed.

        Example:
            >>> new_bounds_lb = dict()
            >>> new_bounds_ub = dict()
            >>> s = set(['I1', 'I2', 'O1', 'O2'])
            >>> constr = 'I1 >= 5'
            >>> parse_constraint(constr, '>=', new_bounds_lb, new_bounds_ub, s)
            >>> True
            >>> new_bounds_lb
            >>> {'I1': 5}
            >>> new_bounds_ub
            >>> {}
    '''
    found = False
    if constraint.find('<=') != -1:
        first = 0
    elif constraint.find('>=') != -1:
        first = 1
    else:
        raise ValueError('Unexpected constraint type,'
                         ' supported types >= and <=')
    lq = constraint.find(split_str)
    if lq != -1:
        found = True
        elements = [elem.strip() for elem in constraint.split(split_str)]
        if len(elements) != 2:
            raise ValueError('Cannot parse constraint: {0}'.format(
                             constraint))

        if elements[first].find('/') != -1:
            parse_price_ratio(elements[first], elements[1 - first],
                              constraint, categories, new_bounds_ub)
        elif elements[1 - first].find('/') != -1:
            parse_price_ratio(elements[1 - first], elements[first],
                              constraint, categories, new_bounds_lb)
        else:
            if elements[first] in categories:
                new_bounds_ub[elements[first]] = float(elements[1 - first])
            elif elements[1 - first] in categories:
                new_bounds_lb[elements[1 - first]] = float(elements[first])
            else:
                raise ValueError('Incorrect constraint, category does not'
                                 ' exist: {0}'.format(
                                     constraint))
    return found


[docs]def create_bounds(constraints, categories):
    ''' Creates proper data structures after parsing all constraints.

        Args:
            constraints (list of str): list of constraints to parse.
            categories (set of str): set of current categories.

        Returns:
            dict of str to tuple of double,
                double or dict of tuple of str,
                str to tuple of double, double:
                dictionary with parsed values of constraints.

        Raises:
            ValueError: if some of the constraints cannot be parsed.

        Example:
            >>> categories = set(['I1', 'I2', 'O1', 'O2'])
            >>> constraints = ['I1 <= 10', 'I1 >= 2', 'O1 >= 3', 'O2 <= 7']
            >>> create_bounds(constraints, categories)
            >>> {'I1': (2, 10), 'O1': (3, None), 'O2': (None, 7)}
            >>> ratio_bounds = ['I1/I2 <= 10', 'I1/I2 >= 1',
                                'O2/O1 >= 0.2', 'O1/O2 <= 0.5']
            >>> create_bounds(ratio_bounds, categories)
            >>> {('I1', 'I2'): (1, 10), ('O2', 'O1'): (0.2, None),
                 ('O1', 'O2'): (None, 0.5)}
    '''
    new_bounds_lb = dict()
    new_bounds_ub = dict()
    for constraint in constraints:
        lq = parse_constraint(constraint, '<=', new_bounds_lb, new_bounds_ub,
                              categories)
        gq = parse_constraint(constraint, '>=', new_bounds_lb, new_bounds_ub,
                              categories)
        if not (lq or gq):
            raise ValueError('Cannot parse constraint: {0}'.format(
                             constraint))

    new_bounds = dict()
    for lb_key, lb_value in new_bounds_lb.items():
        new_bounds[lb_key] = lb_value, new_bounds_ub.get(lb_key, None)

    for up_key, up_value in new_bounds_ub.items():
        new_bounds[up_key] = new_bounds_lb.get(up_key, None), up_value

    assert new_bounds
    return new_bounds


[docs]def contraint_is_price_ratio_type(bounds_key):
    ''' Checks if given parameter is a tuple with two elements. In the case of price
        ratio weight restrictions key of bounds dictionary will be a tuple with two
        elements.

        Args:
            tuple of str: tuple with elements.

        Returns:
            bool: true if given element is a tuple with two elements, false otherwise.
    '''
    return isinstance(bounds_key, tuple) and len(bounds_key) == 2


[docs]def get_price_ratio_categories(val):
    ''' Parses price ratio categories.

        Args:
            val (str): string with price ratio categories.

        Returns:
            tuple of str, str: tuple with categories in numerator and
                denominator respectively.

        Example:
            >>> get_price_ratio_categories('I1/ I2')
            >>> ('I1', 'I2')
            >>> get_price_ratio_categories('   I1   /  I2  ')
            >>> ('I1', 'I2')
            >>> get_price_ratio_categories('  name with spaces / I2')
            >>> ('name with spaces', 'I2')

        Warning:
            If val does not contain / or has more than one /, the
            function will fail with assert.
    '''
    elements = [elem.strip() for elem in val.split('/')]
    assert len(elements) == 2
    return elements[0], elements[1]


[docs]def find_category_name_in_restrictions(val):
    ''' Finds category name in the given constraint.

        Args:
            val (str): string that contains one category name.

        Returns:
            tuple of str, int: category name and index. Index is 0 if
                category name is on the left hand side of the constraint,
                non-zero if category name is on the right hand side.
                In the latter case, index correspond
                to the position of the category name in the constraint.

        Example:
            >>> find_category_name_in_restrictions('I1 <= 7')
            >>> ('I1', 0)
            >>> find_category_name_in_restrictions('  I1  <=  7.8 ')
            >>> ('I1', 0)
            >>> find_category_name_in_restrictions('  category name with spaces  <=  7.8 ')
            >>> ('category name with spaces', 0)
            >>> find_category_name_in_restrictions('7.8 >= I2')
            >>> ('I2', 7)
            >>> find_category_name_in_restrictions('7.8 >=       I2')
            >>> ('I2', 13)
            >>> find_category_name_in_restrictions('I1/O2 >= 0.5')
            >>> ('I1/O2', 0)

        Warning:
            It is assumed that given constraint is a valid constraint with
            category name. If invalid value is given, the function will fail or
            return incorrect value

    '''
    split_val = '<='
    if '>=' in val:
        split_val = '>='
    elements = [elem for elem in val.split(split_val)]
    assert len(elements) == 2
    category = elements[0].strip()
    index = 0
    try:
        float(elements[1])
    except ValueError:
        category = elements[1].strip()
        print('category [{0}], after split [{1}]'.format(category, elements[1]))
        print('index', elements[1].index(category))
        index += len(elements[0]) + 2 + elements[1].index(category)
    return category, index