From 64ac6ada8f9e97a3e56b3db9e038400e66284f46 Mon Sep 17 00:00:00 2001
From: "Konstantin (Tino) Sering" <konstantin.sering@uni-tuebingen.de>
Date: Tue, 11 Aug 2020 00:03:01 +0200
Subject: [PATCH] adds masking functionality

---
 pyndl/__init__.py                         |   2 +-
 pyndl/error_codes.pxd                     |  14 ++-
 pyndl/ndl.py                              | 123 ++++++++++++++++---
 pyndl/ndl_openmp.pyx                      |  56 ++++++++-
 pyndl/ndl_parallel.pxd                    |   8 +-
 pyndl/ndl_parallel.pyx                    | 141 +++++++++++++++++++++-
 tests/resources/event_file_masking.tab.gz | Bin 0 -> 89 bytes
 tests/test_ndl.py                         |  71 ++++++++---
 8 files changed, 369 insertions(+), 46 deletions(-)
 create mode 100644 tests/resources/event_file_masking.tab.gz

diff --git a/pyndl/__init__.py b/pyndl/__init__.py
index d9dba2e..0948914 100644
--- a/pyndl/__init__.py
+++ b/pyndl/__init__.py
@@ -17,7 +17,7 @@
 __author__ = ('Konstantin Sering, Marc Weitz, '
               'David-Elias Künstle, Lennard Schneider')
 __author_email__ = 'konstantin.sering@uni-tuebingen.de'
-__version__ = '0.7.1'
+__version__ = '0.8.0'
 __license__ = 'MIT'
 __description__ = ('Naive discriminative learning implements learning and '
                    'classification models based on the Rescorla-Wagner '
diff --git a/pyndl/error_codes.pxd b/pyndl/error_codes.pxd
index 41cc294..9d4fb14 100644
--- a/pyndl/error_codes.pxd
+++ b/pyndl/error_codes.pxd
@@ -3,11 +3,13 @@ cdef enum ErrorCode:
     MAGIC_NUMBER_DOES_NOT_MATCH = 1
     VERSION_NUMBER_DOES_NOT_MATCH = 2
     INITIAL_ERROR_CODE = 3
+    START_LARGER_END = 4
 
 
-ERROR_CODES = """
-    NO_ERROR = 0
-    MAGIC_NUMBER_DOES_NOT_MATCH = 1
-    VERSION_NUMBER_DOES_NOT_MATCH = 2
-    INITIAL_ERROR_CODE = 3
-    """
+#ERROR_CODES = """
+#    NO_ERROR = 0
+#    MAGIC_NUMBER_DOES_NOT_MATCH = 1
+#    VERSION_NUMBER_DOES_NOT_MATCH = 2
+#    INITIAL_ERROR_CODE = 3
+#    START_LARGER_END = 4
+#    """
diff --git a/pyndl/ndl.py b/pyndl/ndl.py
index ab54fad..08fa4b1 100644
--- a/pyndl/ndl.py
+++ b/pyndl/ndl.py
@@ -43,11 +43,43 @@ def events_from_file(event_path):
     return io.events_from_file(event_path)
 
 
+def _create_cue_outcome_map(cues, outcomes, cues_to_mask, old_cues=frozenset(), old_outcomes=frozenset()):
+    """This function returns a cue_map and an outcome_map with the cues_to_mask
+    placed in the first mask_up_to_excluding index."""
+
+    cues = set(cues) | set(old_cues)
+    outcomes = set(outcomes) | set(old_outcomes)
+
+    # cues to mask have to come in the beginning of the cue_map and we need the
+    # same amount of indices in the outcome_map
+    if cues_to_mask == 'all':
+        cues_to_mask = set(cues)
+    elif cues_to_mask is None:
+        cues_to_mask = set()
+    mask_up_to_excluding = len(cues_to_mask)  # the highest index that should be masked (excluding)
+    cues_not_to_mask = set(cues) - cues_to_mask
+    outcomes_not_to_mask = set(outcomes) - cues_to_mask
+
+    # fix order of sets
+    cues_to_mask = list(cues_to_mask)
+    cues_not_to_mask = list(cues_not_to_mask)
+    outcomes_not_to_mask = list(outcomes_not_to_mask)
+
+    # reassamble cues and outcomes with specific ordering now
+    cues = cues_to_mask + cues_not_to_mask
+    outcomes = cues_to_mask + outcomes_not_to_mask
+
+    cue_map = OrderedDict(((cue, ii) for ii, cue in enumerate(cues)))
+    outcome_map = OrderedDict(((outcome, ii) for ii, outcome in enumerate(outcomes)))
+
+    return mask_up_to_excluding, cue_map, outcome_map
+
+
 def ndl(events, alpha, betas, lambda_=1.0, *,
         method='openmp', weights=None,
         number_of_threads=8, len_sublists=10, remove_duplicates=None,
         verbose=False, temporary_directory=None,
-        events_per_temporary_file=10000000):
+        events_per_temporary_file=10000000, cues_to_mask=None):
     """
     Calculate the weights for all_outcomes over all events in event_file
     given by the files path.
@@ -85,7 +117,11 @@ def ndl(events, alpha, betas, lambda_=1.0, *,
         if none is provided, the operating system's default will
         be used (/tmp on unix)
     events_per_temporary_file: int
-        Number of events in each temporary binary file. Has to be larger than 1
+        number of events in each temporary binary file. Has to be larger than 1
+    cues_to_mask: set of cues or None or 'all'
+        if None no masking is applied, otherwise all cues are masked from
+        themselfes if they appear as outcomes as well in the learning events,
+        'all' indicates that all cues should be masked
 
     Returns
     -------
@@ -111,30 +147,33 @@ def ndl(events, alpha, betas, lambda_=1.0, *,
                                                    verbose=verbose)
     cues = list(cues.keys())
     outcomes = list(outcomes.keys())
-    cue_map = OrderedDict(((cue, ii) for ii, cue in enumerate(cues)))
-    outcome_map = OrderedDict(((outcome, ii) for ii, outcome in enumerate(outcomes)))
-
-    all_outcome_indices = [outcome_map[outcome] for outcome in outcomes]
-
-    shape = (len(outcome_map), len(cue_map))
 
     # initialize weights
     if weights is None:
+        mask_up_to_excluding, cue_map, outcome_map = _create_cue_outcome_map(cues, outcomes, cues_to_mask)
+        shape = (len(outcome_map), len(cue_map))
         weights = np.ascontiguousarray(np.zeros(shape, dtype=np.float64, order='C'))
     elif isinstance(weights, xr.DataArray):
         old_cues = weights.coords["cues"].values.tolist()
-        new_cues = list(set(cues) - set(old_cues))
         old_outcomes = weights.coords["outcomes"].values.tolist()
-        new_outcomes = list(set(outcomes) - set(old_outcomes))
 
+        if cues_to_mask is None:
+            mask_up_to_excluding = 0
+        else:
+            mask_up_to_excluding, cue_map, outcome_map = _create_cue_outcome_map(cues, outcomes,
+                    cues_to_mask, old_cues, old_outcomes)
+            # TODO: allocate weights and copy them cell wise from the old
+            # weights to the new weights
+            raise NotImplementedError('continue learning is not implemented for masking right now')
+
+        new_cues = list(set(cues) - set(old_cues))
+        new_outcomes = list(set(outcomes) - set(old_outcomes))
         cues = old_cues + new_cues
         outcomes = old_outcomes + new_outcomes
 
         cue_map = OrderedDict(((cue, ii) for ii, cue in enumerate(cues)))
         outcome_map = OrderedDict(((outcome, ii) for ii, outcome in enumerate(outcomes)))
 
-        all_outcome_indices = [outcome_map[outcome] for outcome in outcomes]
-
         weights_tmp = np.concatenate((weights.values,
                                       np.zeros((len(new_outcomes), len(old_cues)),
                                                dtype=np.float64, order='C')),
@@ -168,16 +207,25 @@ def ndl(events, alpha, betas, lambda_=1.0, *,
         if verbose:
             print('start learning...')
         # learning
+        all_outcome_indices_masked = list(range(mask_up_to_excluding))
+        all_outcome_indices_normal = list(range(mask_up_to_excluding, len(outcome_map)))
         if method == 'openmp':
             if sys.platform.startswith('darwin'):
                 raise NotImplementedError("OpenMP does not work under MacOs yet."
                                           "Use method='threading' instead.")
+            # 1. learn masked indices
+            ndl_openmp.learn_inplace_masked(binary_files, weights, alpha,
+                                            beta1, beta2, lambda_,
+                                            np.array(all_outcome_indices_masked, dtype=np.uint32),
+                                            len_sublists, number_of_threads)
+            # 2. learn normal
             ndl_openmp.learn_inplace(binary_files, weights, alpha,
                                      beta1, beta2, lambda_,
-                                     np.array(all_outcome_indices, dtype=np.uint32),
+                                     np.array(all_outcome_indices_normal, dtype=np.uint32),
                                      len_sublists, number_of_threads)
         elif method == 'threading':
-            part_lists = slice_list(all_outcome_indices, len_sublists)
+            # 1. learn all masked indices
+            part_lists = slice_list(all_outcome_indices_masked, len_sublists)
 
             working_queue = Queue(len(part_lists))
             threads = []
@@ -189,7 +237,7 @@ def worker():
                         if working_queue.empty():
                             break
                         data = working_queue.get()
-                    ndl_parallel.learn_inplace(binary_files, weights, alpha,
+                    ndl_parallel.learn_inplace_masked(binary_files, weights, alpha,
                                                beta1, beta2, lambda_, data)
 
             with queue_lock:
@@ -203,6 +251,36 @@ def worker():
 
             for thread in threads:
                 thread.join()
+
+            # 2. learn all normal
+            part_lists = slice_list(all_outcome_indices_normal, len_sublists)
+
+            working_queue = Queue(len(part_lists))
+            threads = []
+            queue_lock = threading.Lock()
+
+            def worker():
+                while True:
+                    with queue_lock:
+                        if working_queue.empty():
+                            break
+                        data = working_queue.get()
+                    ndl_parallel.learn_inplace(binary_files, weights,
+                                                      alpha, beta1, beta2,
+                                                      lambda_, data)
+
+            with queue_lock:
+                for partlist in part_lists:
+                    working_queue.put(np.array(partlist, dtype=np.uint32))
+
+            for _ in range(number_of_threads):
+                thread = threading.Thread(target=worker)
+                thread.start()
+                threads.append(thread)
+
+            for thread in threads:
+                thread.join()
+
         else:
             raise ValueError('method needs to be either "threading" or "openmp"')
 
@@ -220,6 +298,9 @@ def worker():
                         __name__ + "." + ndl.__name__, method=method, attrs=attrs_to_be_updated)
 
     # post-processing
+    # we have to extract the right ordering
+    cues = list(cue_map.keys())
+    outcomes = list(outcome_map.keys())
     weights = xr.DataArray(weights, [('outcomes', outcomes), ('cues', cues)],
                            attrs=attrs)
     return weights
@@ -312,7 +393,7 @@ def attrs(self, attrs):
 
 def dict_ndl(events, alphas, betas, lambda_=1.0, *,
              weights=None, inplace=False, remove_duplicates=None,
-             make_data_array=False, verbose=False):
+             make_data_array=False, verbose=False, cues_to_mask=None):
     """
     Calculate the weights for all_outcomes over all events in event_file.
 
@@ -347,6 +428,10 @@ def dict_ndl(events, alphas, betas, lambda_=1.0, *,
         if True makes a xarray.DataArray out of the dict of dicts.
     verbose : bool
         print some output if True.
+    cues_to_mask: set of cues or None or 'all'
+        if None no masking is applied, otherwise all cues are masked from
+        themselfes if they appear as outcomes as well in the learning events,
+        'all' indicates that all cues should be masked
 
     Returns
     -------
@@ -370,6 +455,9 @@ def dict_ndl(events, alphas, betas, lambda_=1.0, *,
     if not (remove_duplicates is None or isinstance(remove_duplicates, bool)):
         raise ValueError("remove_duplicates must be None, True or False")
 
+    if cues_to_mask is None:
+        cues_to_mask = set()
+
     wall_time_start = time.perf_counter()
     cpu_time_start = time.process_time()
     if isinstance(events, str):
@@ -434,6 +522,9 @@ def dict_ndl(events, alphas, betas, lambda_=1.0, *,
             else:
                 update = beta2 * (0 - association_strength)
             for cue in cues:
+                if cues_to_mask == 'all' or cue in cues_to_mask:
+                    if cue == outcome:
+                        continue
                 weights[outcome][cue] += alphas[cue] * update
 
     cpu_time_stop = time.process_time()
diff --git a/pyndl/ndl_openmp.pyx b/pyndl/ndl_openmp.pyx
index 6ebd07b..f071dc6 100644
--- a/pyndl/ndl_openmp.pyx
+++ b/pyndl/ndl_openmp.pyx
@@ -5,8 +5,8 @@ ctypedef np.float64_t dtype_t
 cimport cython
 from cython.parallel cimport parallel, prange
 
-from ndl_parallel cimport learn_inplace_ptr
-from error_codes cimport ErrorCode, NO_ERROR, INITIAL_ERROR_CODE, ERROR_CODES
+from ndl_parallel cimport learn_inplace_ptr, learn_inplace_masked_ptr
+from error_codes cimport ErrorCode, NO_ERROR, INITIAL_ERROR_CODE
 
 
 def learn_inplace(binary_file_paths, np.ndarray[dtype_t, ndim=2] weights,
@@ -23,6 +23,8 @@ def learn_inplace(binary_file_paths, np.ndarray[dtype_t, ndim=2] weights,
     cdef unsigned int start_val, end_val, ii, number_parts
     cdef ErrorCode error = INITIAL_ERROR_CODE
 
+    if length_all_outcomes == 0:
+        return
 
   #  cdef String
     # weights muss contigousarray sein und mode=c, siehe:
@@ -48,4 +50,52 @@ def learn_inplace(binary_file_paths, np.ndarray[dtype_t, ndim=2] weights,
                     break
 
     if (error != NO_ERROR):
-        raise IOError(f'binary files does not have proper format, error code {error}\n{ERROR_CODES}')
+        raise IOError(f'binary files does not have proper format, error code {error}')
+
+
+# The masked versions where learning is ignored when cue and outcome have the
+# same index. The code is copied to not take the penalty for the if statement
+# in the innerst loop in the case where no masking is applied.
+
+def learn_inplace_masked(binary_file_paths, np.ndarray[dtype_t, ndim=2] weights,
+                  dtype_t alpha, dtype_t beta1,
+                  dtype_t beta2, dtype_t lambda_,
+                  np.ndarray[unsigned int, ndim=1] all_outcomes,
+                  unsigned int chunksize,
+                  unsigned int number_of_threads):
+
+    cdef unsigned int mm = weights.shape[1]  # number of cues == columns
+    cdef unsigned int* all_outcomes_ptr = <unsigned int *> all_outcomes.data
+    cdef unsigned int length_all_outcomes = all_outcomes.shape[0]
+    cdef char* fname
+    cdef unsigned int start_val, end_val, ii, number_parts
+    cdef ErrorCode error = INITIAL_ERROR_CODE
+
+    if length_all_outcomes == 0:
+        return
+
+  #  cdef String
+    # weights muss contigousarray sein und mode=c, siehe:
+    #cdef np.ndarray[np.uint32_t, ndim=3, mode = 'c'] np_buff = np.ascontiguousarray(im, dtype = np.uint32)
+    cdef dtype_t* weights_ptr = <dtype_t *> weights.data # ueberlegen ob [][] oder ** oder [] oder *
+
+    for binary_file_path in binary_file_paths: #
+        filename_byte_string = binary_file_path.encode("UTF-8")
+        fname = filename_byte_string
+
+        number_parts = math.ceil(<double> length_all_outcomes / chunksize)
+
+        with nogil, parallel(num_threads=number_of_threads):
+            for ii in prange(number_parts, schedule="dynamic", chunksize=1):
+                start_val = ii * chunksize
+                end_val = min(start_val + chunksize, length_all_outcomes)
+                if start_val == length_all_outcomes:
+                    break
+                error = learn_inplace_masked_ptr(fname, weights_ptr, mm, alpha, beta1,
+                                  beta2, lambda_, all_outcomes_ptr, start_val,
+                                  end_val)
+                if error != NO_ERROR:
+                    break
+
+    if (error != NO_ERROR):
+        raise IOError(f'binary files does not have proper format, error code {error}')
diff --git a/pyndl/ndl_parallel.pxd b/pyndl/ndl_parallel.pxd
index 48351b8..5d49273 100644
--- a/pyndl/ndl_parallel.pxd
+++ b/pyndl/ndl_parallel.pxd
@@ -3,6 +3,10 @@ ctypedef np.float64_t dtype_t
 from error_codes cimport ErrorCode
 
 
-cdef ErrorCode learn_inplace_ptr(char*, dtype_t*, unsigned int, dtype_t, dtype_t,
-                        dtype_t, dtype_t, unsigned int*, unsigned int,
+cdef ErrorCode learn_inplace_ptr(char*, dtype_t*, unsigned int, dtype_t,
+                        dtype_t, dtype_t, dtype_t, unsigned int*, unsigned int,
+                        unsigned int) nogil
+
+cdef ErrorCode learn_inplace_masked_ptr(char*, dtype_t*, unsigned int, dtype_t,
+                        dtype_t, dtype_t, dtype_t, unsigned int*, unsigned int,
                         unsigned int) nogil
diff --git a/pyndl/ndl_parallel.pyx b/pyndl/ndl_parallel.pyx
index c51ad8f..2d662dc 100644
--- a/pyndl/ndl_parallel.pyx
+++ b/pyndl/ndl_parallel.pyx
@@ -3,7 +3,8 @@ import math
 from libc.stdlib cimport abort, malloc, free
 from libc.stdio cimport fopen, fread, fclose, FILE
 
-from error_codes cimport ErrorCode, NO_ERROR, MAGIC_NUMBER_DOES_NOT_MATCH, VERSION_NUMBER_DOES_NOT_MATCH, INITIAL_ERROR_CODE, ERROR_CODES
+from error_codes cimport ErrorCode, NO_ERROR, MAGIC_NUMBER_DOES_NOT_MATCH, VERSION_NUMBER_DOES_NOT_MATCH, INITIAL_ERROR_CODE, START_LARGER_END
+
 
 cdef unsigned int MAGIC_NUMBER = 14159265
 cdef unsigned int CURRENT_VERSION_WITH_FREQ = 215
@@ -51,6 +52,9 @@ def learn_inplace(binary_file_paths, np.ndarray[dtype_t, ndim=2] weights,
     cdef unsigned int start_val, end_val
     cdef ErrorCode error = INITIAL_ERROR_CODE
 
+    if length_all_outcomes == 0:
+        return NO_ERROR
+
   #  cdef String
     # weights muss contigousarray sein und mode=c, siehe:
     #cdef np.ndarray[np.uint32_t, ndim=3, mode = 'c'] np_buff = np.ascontiguousarray(im, dtype = np.uint32)
@@ -68,7 +72,7 @@ def learn_inplace(binary_file_paths, np.ndarray[dtype_t, ndim=2] weights,
                 break
 
     if (error != NO_ERROR):
-        raise IOError(f'binary files does not have proper format, error code {error}\n{ERROR_CODES}')
+        raise IOError(f'binary files does not have proper format, error code {error}')
 
 
 cdef int is_element_of(unsigned int elem, unsigned int* arr, unsigned int size) nogil:
@@ -88,6 +92,136 @@ cdef ErrorCode learn_inplace_ptr(char* binary_file_path, dtype_t* weights,
                         unsigned int start,
                         unsigned int end) nogil:
 
+    if start == end:
+        return NO_ERROR
+    elif start > end:
+        return START_LARGER_END
+
+    cdef unsigned int number_of_events, number_of_cues, number_of_outcomes
+    cdef dtype_t association_strength, update
+    cdef unsigned int magic_number, version, ii, jj, event, appearance
+    cdef unsigned long long index
+    cdef unsigned int* cue_indices
+    cdef unsigned int* outcome_indices
+    cdef unsigned int max_number_of_cues = 1024
+    cdef unsigned int max_number_of_outcomes = 1024
+
+    cdef FILE* binary_file
+    binary_file = fopen(binary_file_path, "rb")
+
+    read_next_int(&magic_number, binary_file)
+    if not magic_number == MAGIC_NUMBER:
+        fclose(binary_file)
+        return MAGIC_NUMBER_DOES_NOT_MATCH
+    read_next_int(&version, binary_file)
+    if version == CURRENT_VERSION:
+        pass
+    else:
+        fclose(binary_file)
+        return VERSION_NUMBER_DOES_NOT_MATCH
+
+    # preallocate memory
+    cue_indices = <unsigned int *> malloc(sizeof(unsigned int) * max_number_of_cues)
+    outcome_indices = <unsigned int *> malloc(sizeof(unsigned int) * max_number_of_outcomes)
+
+    read_next_int(&number_of_events, binary_file)
+
+    for event in range(number_of_events):
+        # cues
+        read_next_int(&number_of_cues, binary_file)
+        if number_of_cues > max_number_of_cues:
+            max_number_of_cues = number_of_cues
+            free(cue_indices)
+            cue_indices = <unsigned int *> malloc(sizeof(unsigned int) * max_number_of_cues)
+        fread(cue_indices, 4, number_of_cues, binary_file)
+
+        # outcomes
+        read_next_int(&number_of_outcomes, binary_file)
+        if number_of_outcomes > max_number_of_outcomes:
+            max_number_of_outcomes = number_of_outcomes
+            free(outcome_indices)
+            outcome_indices = <unsigned int *> malloc(sizeof(unsigned int) * max_number_of_outcomes)
+        fread(outcome_indices, 4, number_of_outcomes, binary_file)
+
+        # learn
+        for ii in range(start, end):
+            association_strength = 0.0
+            for jj in range(number_of_cues):
+                # this overflows:
+                #index = cue_indices[jj] + mm * all_outcome_indices[ii]
+                index = mm  # implicit cast to unsigned long long
+                index *=  all_outcome_indices[ii]  # this can't overflow anymore
+                index += cue_indices[jj]  # this can't overflow anymore
+                # worst case: 4294967295 * 4294967295 + 4294967295 == 18446744069414584320 < 18446744073709551615
+                association_strength += weights[index]
+            if is_element_of(all_outcome_indices[ii], outcome_indices, number_of_outcomes):
+                update = beta1 * (lambda_ - association_strength)
+            else:
+                update = beta2 * (0.0 - association_strength)
+            for jj in range(number_of_cues):
+                index = mm  # implicit cast to unsigned long long
+                index *=  all_outcome_indices[ii]  # this can't overflow anymore
+                index += cue_indices[jj]  # this can't overflow anymore
+                weights[index] += alpha * update
+
+    fclose(binary_file)
+    free(cue_indices)
+    free(outcome_indices)
+    return NO_ERROR
+
+
+# The masked versions where learning is ignored when cue and outcome have the
+# same index. The code is copied to not take the penalty for the if statement
+# in the innerst loop in the case where no masking is applied.
+
+def learn_inplace_masked(binary_file_paths, np.ndarray[dtype_t, ndim=2] weights,
+                  dtype_t alpha, dtype_t beta1,
+                  dtype_t beta2, dtype_t lambda_,
+                  np.ndarray[unsigned int, ndim=1] all_outcomes):
+
+    cdef unsigned int mm = weights.shape[1]  # number of cues == columns
+    cdef unsigned int* all_outcomes_ptr = <unsigned int *> all_outcomes.data
+    cdef unsigned int length_all_outcomes = all_outcomes.shape[0]
+    cdef char* fname
+    cdef unsigned int start_val, end_val
+    cdef ErrorCode error = INITIAL_ERROR_CODE
+
+    if length_all_outcomes == 0:
+        return NO_ERROR
+
+  #  cdef String
+    # weights muss contigousarray sein und mode=c, siehe:
+    #cdef np.ndarray[np.uint32_t, ndim=3, mode = 'c'] np_buff = np.ascontiguousarray(im, dtype = np.uint32)
+    cdef dtype_t* weights_ptr = <dtype_t *> weights.data # ueberlegen ob [][] oder ** oder [] oder *
+
+    for binary_file_path in binary_file_paths: #
+        filename_byte_string = binary_file_path.encode("UTF-8")
+        fname = filename_byte_string
+
+        with nogil:
+            error = learn_inplace_masked_ptr(fname, weights_ptr, mm, alpha, beta1,
+                              beta2, lambda_, all_outcomes_ptr, 0,
+                              length_all_outcomes)
+            if error != NO_ERROR:
+                break
+
+    if (error != NO_ERROR):
+        raise IOError(f'binary files does not have proper format, error code {error}')
+
+
+# ggf exception zurückgeben
+cdef ErrorCode learn_inplace_masked_ptr(char* binary_file_path, dtype_t* weights,
+                        unsigned int mm,
+                        dtype_t alpha, dtype_t beta1,
+                        dtype_t beta2, dtype_t lambda_,
+                        unsigned int* all_outcome_indices,
+                        unsigned int start,
+                        unsigned int end) nogil:
+
+    if start == end:
+        return NO_ERROR
+    elif start > end:
+        return START_LARGER_END
 
     cdef unsigned int number_of_events, number_of_cues, number_of_outcomes
     cdef dtype_t association_strength, update
@@ -151,6 +285,9 @@ cdef ErrorCode learn_inplace_ptr(char* binary_file_path, dtype_t* weights,
             else:
                 update = beta2 * (0.0 - association_strength)
             for jj in range(number_of_cues):
+                # check for masking:
+                if all_outcome_indices[ii] == cue_indices[jj]:
+                    continue
                 index = mm  # implicit cast to unsigned long long
                 index *=  all_outcome_indices[ii]  # this can't overflow anymore
                 index += cue_indices[jj]  # this can't overflow anymore
diff --git a/tests/resources/event_file_masking.tab.gz b/tests/resources/event_file_masking.tab.gz
new file mode 100644
index 0000000000000000000000000000000000000000..ddea1361c6471df0c46ceaf3658d1587b30cf70b
GIT binary patch
literal 89
zcmb2|=HMt3H;iXsPAyB#D~V6b%t?*UO)SpN%uClRNlaoW*7MbU_C;UU^W1qL!>h-?
s%l{e6WCgSIq8cUf8Rw@S;S}liXY5*bY`J47!_gfKYS|^f9T*rG0H_rqNB{r;

literal 0
HcmV?d00001

diff --git a/tests/test_ndl.py b/tests/test_ndl.py
index bff7e96..1abdde8 100644
--- a/tests/test_ndl.py
+++ b/tests/test_ndl.py
@@ -19,6 +19,7 @@
 TEST_ROOT = os.path.join(os.path.pardir, os.path.dirname(__file__))
 FILE_PATH_SIMPLE = os.path.join(TEST_ROOT, "resources/event_file_simple.tab.gz")
 FILE_PATH_MULTIPLE_CUES = os.path.join(TEST_ROOT, "resources/event_file_multiple_cues.tab.gz")
+FILE_PATH_MASKING = os.path.join(TEST_ROOT, "resources/event_file_masking.tab.gz")
 REFERENCE_PATH = os.path.join(TEST_ROOT, 'reference/weights_event_file_simple.csv')
 REFERENCE_PATH_NDL2 = os.path.join(TEST_ROOT, 'reference/weights_event_file_simple_ndl2.csv')
 REFERENCE_PATH_MULTIPLE_CUES_NDL2 = os.path.join(TEST_ROOT, 'reference/weights_event_file_multiple_cues_ndl2.csv')
@@ -57,6 +58,36 @@ def result_dict_ndl_data_array():
     return ndl.dict_ndl(FILE_PATH_SIMPLE, ALPHA, BETAS, make_data_array=True)
 
 
+@pytest.fixture(scope='module')
+def result_dict_ndl_mask_all():
+    return ndl.dict_ndl(FILE_PATH_MASKING, ALPHA, BETAS, cues_to_mask='all')
+
+
+@pytest.fixture(scope='module')
+def result_dict_ndl_mask_ab():
+    return ndl.dict_ndl(FILE_PATH_MASKING, ALPHA, BETAS, cues_to_mask={'a', 'b'})
+
+
+@pytest.fixture(scope='module')
+def result_ndl_threading_mask_all():
+    return ndl.ndl(FILE_PATH_MASKING, ALPHA, BETAS, method='threading', cues_to_mask='all')
+
+
+@pytest.fixture(scope='module')
+def result_ndl_threading_mask_ab():
+    return ndl.ndl(FILE_PATH_MASKING, ALPHA, BETAS, method='threading', cues_to_mask={'a', 'b'})
+
+
+@pytest.fixture(scope='module')
+def result_ndl_openmp_mask_all():
+    return ndl.ndl(FILE_PATH_MASKING, ALPHA, BETAS, method='openmp', cues_to_mask='all')
+
+
+@pytest.fixture(scope='module')
+def result_ndl_openmp_mask_ab():
+    return ndl.ndl(FILE_PATH_MASKING, ALPHA, BETAS, method='openmp', cues_to_mask={'a', 'b'})
+
+
 @pytest.fixture(scope='module')
 def result_continue_learning():
     events_simple = pd.read_csv(FILE_PATH_SIMPLE, sep="\t")
@@ -177,6 +208,29 @@ def test_continue_learning_dict_ndl_data_array(result_dict_ndl, result_dict_ndl_
     assert len(unequal) == 0  # pylint: disable=len-as-condition
 
 
+def test_masking_all(result_dict_ndl_mask_all, result_ndl_threading_mask_all, result_ndl_openmp_mask_all):
+    unequal, unequal_ratio = compare_arrays(FILE_PATH_MASKING, result_dict_ndl_mask_all,
+                                            result_ndl_threading_mask_all)
+    print('%.2f ratio unequal' % unequal_ratio)
+    assert len(unequal) == 0  # pylint: disable=len-as-condition
+    unequal, unequal_ratio = compare_arrays(FILE_PATH_MASKING, result_dict_ndl_mask_all,
+                                            result_ndl_openmp_mask_all)
+    print('%.2f ratio unequal' % unequal_ratio)
+    assert len(unequal) == 0  # pylint: disable=len-as-condition
+
+
+def test_masking_ab(result_dict_ndl_mask_ab, result_ndl_threading_mask_ab, result_ndl_openmp_mask_ab):
+    unequal, unequal_ratio = compare_arrays(FILE_PATH_MASKING, result_dict_ndl_mask_ab,
+                                            result_ndl_threading_mask_ab)
+    print('%.2f ratio unequal' % unequal_ratio)
+    assert len(unequal) == 0  # pylint: disable=len-as-condition
+    unequal, unequal_ratio = compare_arrays(FILE_PATH_MASKING, result_dict_ndl_mask_ab,
+                                            result_ndl_openmp_mask_ab)
+    print('%.2f ratio unequal' % unequal_ratio)
+    assert len(unequal) == 0  # pylint: disable=len-as-condition
+
+
+
 @pytest.mark.nolinux
 def test_continue_learning(result_continue_learning, result_ndl_openmp):
     assert result_continue_learning.shape == result_ndl_openmp.shape
@@ -450,7 +504,6 @@ def clock(func, args, **kwargs):
 
 def compare_arrays(file_path, arr1, arr2):
     _, cues, outcomes = count.cues_outcomes(file_path)
-    cue_map, outcome_map, _ = generate_mapping(file_path)
 
     unequal = list()
 
@@ -458,11 +511,7 @@ def compare_arrays(file_path, arr1, arr2):
         for cue in cues:
             values = list()
             for array in (arr1, arr2):
-                if isinstance(array, np.ndarray):
-                    outcome_index = outcome_map[outcome]
-                    cue_index = cue_map[cue]
-                    values.append(array[outcome_index][cue_index])
-                elif isinstance(array, xr.DataArray):
+                if isinstance(array, xr.DataArray):
                     values.append(array.loc[{'outcomes': outcome, 'cues': cue}].values)
                 elif isinstance(array, pd.DataFrame):
                     values.append(array.loc[outcome][cue])
@@ -475,13 +524,3 @@ def compare_arrays(file_path, arr1, arr2):
 
     unequal_ratio = len(unequal) / (len(outcomes) * len(cues))
     return (unequal, unequal_ratio)
-
-
-def generate_mapping(event_path):
-    _, cues, outcomes = count.cues_outcomes(event_path)
-    all_cues = list(cues.keys())
-    all_outcomes = list(outcomes.keys())
-    cue_map = OrderedDict(((cue, ii) for ii, cue in enumerate(all_cues)))
-    outcome_map = OrderedDict(((outcome, ii) for ii, outcome in enumerate(all_outcomes)))
-
-    return (cue_map, outcome_map, all_outcomes)