Module integer_sequences.generator

Expand source code
from generator.XESTransformator import XESTransformator
from generator.SequenceGenerator import SequenceGenerator
from inspect import getmembers

__all__ = ["SequenceGenerator", "XESTransformator"]

# Override pdoc to also document private methods, but not __class__ methods.
__pdoc__ = {}
for cls in (XESTransformator, SequenceGenerator):
    for name, value in getmembers(cls):
        if name.startswith("_") and not name.endswith("_"):
            __pdoc__[cls.__name__ + "." + name] = True

Classes

class SequenceGenerator (wanted_length: int = 10)

Generates a log of traces that originate from the same generating distribution / method.

Attributes

length
The amount of items to generate.
config
A config object that holds, per implemented metod, a list of required parameters and a reference to the method.

Initializes the SequenceGenerator class.

Parameters

wanted_length: The desired amount of items to generate. 10 by default.

Expand source code
class SequenceGenerator:
    """
    Generates a log of traces that originate from the same generating distribution / method.

    Attributes:
      length: The amount of items to generate.
      config: A config object that holds, per implemented metod,
                a list of required parameters and a reference to the method.
    """

    # Class Methods
    def __init__(self, wanted_length: int = 10):
        """
        Initializes the SequenceGenerator class.

        Parameters:
          wanted_length: The desired amount of items to generate.
                           10 by default.
        """
        # Test for invalid lengths
        if wanted_length <= 0:
            raise InvalidLengthException(wanted_length)

        # At this point we know that length is at least 1.
        self.length: int = wanted_length
        """The amount of items to generate."""
        self.config: Dict[str, Dict[str, Any]] = {
            # fib = short term, multiple dependencies
            "fib": {"parameters": ["first", "second"], "method": self.__fib_wrapper},
            "pascal": {"parameters": ["first"], "method": self.__pascal_wrapper},
            "recaman": {"parameters": ["first"], "method": self.__recaman_wrapper},
            "catalan": {"parameters": ["first"], "method": self.__catalan_wrapper},
            "range_up": {
                "parameters": ["first", "step"],
                "method": self.__range_up_wrapper,
            },
            "range_down": {
                "parameters": ["last", "step"],
                "method": self.__range_down_wrapper,
            },
            # long term, multiple dependency
            "long_term_dependency": {
                "parameters": ["first", "second", "third", "fourth", "fifth"],
                "method": self.__long_term_dependency_wrapper,
            },
            # long term, singular dependency
            "long_term_single_dependency": {
                "parameters": [
                    "first",
                    "second",
                    "third",
                    "fourth",
                    "fifth",
                    "constant",
                ],
                "method": self.__long_term_single_dependency_wrapper,
            },
            # short term, singular dependency
            "short_term_single_dependency": {
                "parameters": ["first", "constant"],
                "method": self.__short_term_single_dependency_wrapper,
            },
        }
        """A config object that holds, per implemented metod,
        a list of required parameters and a reference to the method."""

    def __repr__(self) -> str:
        return f"SequenceGenerator(length={self.length}, implemented_generators={self.get_generators()})"

    # Generator methods
    def __fib(self, first: int = 1, second: int = 1) -> Generator:
        """
        Yield the first `self.length` numbers of the Fibonnaci sequence
        where the first term is `first` and the second term is `second`.

        Parameters:
          first: The first element of the sequence.
          second: The second element of the sequence.

        Returns a generator that generates the sequence.
        """
        yield first
        # If you only want 1 number, for some reason?
        if self.length == 1:
            return

        yield second

        count = 0
        while count < self.length - 2:
            # Increment count
            count += 1
            # Compute next value
            current = first + second
            yield current
            # Update values
            first = second
            second = current

    def __fib_wrapper(self, params: Dict[str, int]) -> Generator:
        """
        Wrapper method for `self.fib`.
        Written so we can have a unified interface to generate traces, given a sequence key.

        **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`.
        """
        return self.__fib(first=params["first"], second=params["second"])

    def __pascal(self, first: int = 1) -> Generator:
        """
        Yield the first `self.length` numbers of the sequence defined by
        reading the pascal triangle from left to right, top to bottom,
        where the first integer is `first` (usually this is 1).

        Parameters:
          first: The first integer on top of the triangle,
                   and consequently the first integer in the sequence.

        Returns a generator that generates the sequence.
        """
        yield first

        # If for some reason you only want the first number?
        if self.length == 1:
            return

        def next_row(row: List[int]) -> List[int]:
            """
            Computes the next row in the triangle of pascal.

            Parameters:
              row: The current row.

            Returns a the next row.
            """
            lst = []
            tmp = 0
            for val in row:
                lst.append(tmp + val)
                tmp = val
            lst.append(first)
            return lst

        row = [first]

        # Keep track of counts, start at 1
        counts = 1

        while True:
            # compute the next row
            next = next_row(row)

            # loop through them
            for item in next:
                # test for length
                if counts < self.length:
                    yield item
                    counts += 1
                else:
                    return

            row = next

    def __pascal_wrapper(self, params: Dict[str, int]) -> Generator:
        """
        Wrapper method for `self.pascal`.
        Written so we can have a unified interface to generate traces, given a sequence key.

        **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`.
        """
        return self.__pascal(first=params["first"])

    def __recaman(self, first: int = 0) -> Generator:
        """
        Generator for the Recaman's sequence, a well known sequence
        from the on-line encyclopedia of integer sequences.
        Available [here](https://oeis.org/A005132).

        Parameters:
          first: The first element of the sequence.
                   The original sequence defines this as 0.

        Returns a generator that generates the sequence.
        """
        # If for some reason you only want the first number?
        if self.length == 1:
            yield first
            return

        # Keep track of a count and already seen digits
        count = 0
        current = first
        already_seen = set([])

        def get_next(current: int, index: int) -> int:
            """
            Computes the next value in the sequence.

            Parameters:
              current: The current value.
              index: The index of the current value.

            Returns the new value.
            """
            # Compute a(n) = a(n-1) - n
            # if nonnegative and not in sequence, return
            new_number = current - index

            if (new_number < 0) | (new_number in already_seen):
                # Negative or already seen: add index in stead
                new_number = current + index

            return new_number

        while count < self.length:
            # Compute next term in the sequence
            new = get_next(current, count)

            # Save it to the set of already seen terms
            already_seen.add(new)

            # Set current to new value and increase count
            count += 1
            current = new

            # Finally, yield the correct value
            yield current

    def __recaman_wrapper(self, params: Dict[str, int]) -> Generator:
        """
        Wrapper method for `self.recaman`.
        Written so we can have a unified interface to generate traces, given a sequence key.

        **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`.
        """
        return self.__recaman(first=params["first"])

    def __catalan(self, first: int = 1) -> Generator:
        """
        Generates the Catalan numbers, where the first integer is parametrised.
        The catalan sequence is available [here](https://oeis.org/A000108).

        Implemented using dynamic programming as the direct formula has issues with `n > 30`
        In particular, 14544636039226909 became 14544636039226908 and all subsequent values were off.

        Parameters:
          first: The first element of the sequence.
                   The original sequence defines this as 1.

        Returns a generator that generates the sequence.
        """
        # Initialise dynamic programming table
        dp = [0] * (self.length + 1)
        dp[0] = first  # in our case this is a parameter. By default it should be 1.
        dp[1] = first  # in our case this is a parameter. By default it should be 1.

        # Fill the dp entries based on the recursive formula
        for i in range(2, self.length + 1):
            for j in range(i):
                dp[i] += dp[j] * dp[i - j - 1]

        # Loop through table, and yield the next item as long as the index does not exceed the length!
        for index in range(len(dp)):
            if index < self.length:
                yield dp[index]

    def __catalan_wrapper(self, params: Dict[str, int]) -> Generator:
        """
        Wrapper method for `self.recaman`.
        Written so we can have a unified interface to generate traces, given a sequence key.

        **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`.
        """
        return self.__catalan(first=params["first"])

    def __range_up(self, first: int = 0, step: int = 1) -> Generator:
        """
        Simple range generator that counts up.

        Parameters:
          first: The first element of the sequence.
          step: The stepsize passed to range

        Returns a generator that generates the sequence.
        """

        # Use our own generator
        def range_generator(first: int, step: int) -> Generator:
            """
            Internal range generator that counts up.
            """
            i = first
            while True:
                yield i
                i += step

        count = 0
        for i in range_generator(first, step):
            if count < self.length:
                yield i
                count += 1
            else:
                break

    def __range_up_wrapper(self, params: Dict[str, int]) -> Generator:
        """
        Wrapper method for `self.range_up`.
        Written so we can have a unified interface to generate traces, given a sequence key.

        **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`.
        """
        return self.__range_up(first=params["first"], step=params["step"])

    def __range_down(self, last: int = 0, step: int = 1) -> Generator:
        """
        Simple range generator that counts down with stepsize `step`
        such that the last element will be `last`.

        Parameters:
          last: The last element of the sequence.
          step: The stepsize passed to range

        Returns a generator that generates the sequence.
        """
        # Compute highest possible value such that we do not go negative
        first = last + step * self.length - 1

        # Use our own generator
        def range_generator(first: int, step: int) -> Generator:
            """
            Internal range generator that counts down.
            """
            i = first
            while True:
                yield i
                i -= step

        count = 0
        for i in range_generator(first, step):
            if count < self.length:
                yield i
                count += 1
            else:
                break

    def __range_down_wrapper(self, params: Dict[str, int]) -> Generator:
        """
        Wrapper method for `self.range_down`.
        Written so we can have a unified interface to generate traces, given a sequence key.

        **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`.
        """
        return self.__range_down(last=params["last"], step=params["step"])

    def __long_term_dependency(
        self,
        first: int = 0,
        second: int = 0,
        third: int = 0,
        fourth: int = 0,
        fifth: int = 0,
    ) -> Generator:
        """
        Generalization of Fibonacci sequence with increased dependency, where F(n) = F(n-1) + F(n-5).
        In other words, the 6th term is equal to the sum of the 5th and the 1st.
        """
        count = 0

        # Basecases
        should_check = [first, second, third, fourth, fifth]
        for num in should_check:
            if count < self.length:
                yield num
                count += 1
            else:
                return

        # Assume we want the next element in general case

        # Need to keep points to 5 previous elements
        n_minus_1 = fifth
        n_minus_2 = fourth
        n_minus_3 = third
        n_minus_4 = second
        n_minus_5 = first

        while count < self.length:
            # Increment count
            count += 1

            # Compute next number and yield it
            n = n_minus_5 + n_minus_1
            yield n

            # Update values
            n_minus_1, n_minus_2, n_minus_3, n_minus_4, n_minus_5 = (
                n,
                n_minus_1,
                n_minus_2,
                n_minus_3,
                n_minus_4,
            )

    def __long_term_dependency_wrapper(self, params: Dict[str, int]) -> Generator:
        """
        Wrapper method for `self.long_term_dependency`.
        Written so we can have a unified interface to generate traces, given a sequence key.

        **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`.
        """
        return self.__long_term_dependency(
            first=params["first"],
            second=params["second"],
            third=params["third"],
            fourth=params["fourth"],
            fifth=params["fifth"],
        )

    def __long_term_single_dependency(
        self,
        first: int = 0,
        second: int = 0,
        third: int = 0,
        fourth: int = 0,
        fifth: int = 0,
        constant: int = 1,
    ) -> Generator:
        """
        F(n) = F(n-5) * c.
        """
        count = 0

        # Basecases
        should_check = [first, second, third, fourth, fifth]
        for num in should_check:
            if count < self.length:
                yield num
                count += 1
            else:
                return

        # Assume we want the next element in general case

        # Need to keep points to 5 previous elements
        n_minus_1 = fifth
        n_minus_2 = fourth
        n_minus_3 = third
        n_minus_4 = second
        n_minus_5 = first

        while count < self.length:
            # Increment count
            count += 1

            # Compute next number and yield it
            n = n_minus_5 * constant
            yield n

            # Update values
            n_minus_1, n_minus_2, n_minus_3, n_minus_4, n_minus_5 = (
                n,
                n_minus_1,
                n_minus_2,
                n_minus_3,
                n_minus_4,
            )

    def __long_term_single_dependency_wrapper(
        self, params: Dict[str, int]
    ) -> Generator:
        """
        Wrapper method for `self.__long_term_single_dependency`.
        Written so we can have a unified interface to generate traces, given a sequence key.

        **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`.
        """
        return self.__long_term_single_dependency(
            first=params["first"],
            second=params["second"],
            third=params["third"],
            fourth=params["fourth"],
            fifth=params["fifth"],
            constant=params["constant"],
        )

    def __short_term_single_dependency(
        self, first: int = 1, constant: int = 2
    ) -> Generator:
        """
        A short term dependency. F(n) = F(n-1) * c
        """
        yield first
        # If you only want 1 number, for some reason?
        if self.length == 1:
            return

        count = 0
        while count < self.length - 1:
            # Increment count
            count += 1
            # Compute next value
            current = first * constant
            yield current
            # Update values
            first = current

    def __short_term_single_dependency_wrapper(
        self, params: Dict[str, int]
    ) -> Generator:
        """
        Wrapper method for `self.range_down`.
        Written so we can have a unified interface to generate traces, given a sequence key.

        **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`.
        """
        return self.__short_term_single_dependency(
            first=params["first"], constant=params["constant"]
        )

    # Private getters
    def __get_params(self, seq_name: str) -> List[str]:
        """
        Gets config parameters for a particul sequence generator.

        Parameters:
          seq_name: The name of the sequence generation method for which to retrieve parameters.

        Returns a list of parameters.
        """
        params = [str(item) for item in self.config[seq_name]["parameters"]]
        return params

    def __get_method(self, seq_name: str) -> Callable[[Dict[str, int]], Generator]:
        """
        Gets the method reference for a particular sequence generator.

        Parameters:
          seq_name: The name of the sequence generation method for which to retrieve a method reference.

        Returns a method reference.
        """
        return self.config[seq_name]["method"]

    # Helper methods
    def __check_params(self, given: Dict[str, Any], required: List[str]) -> None:
        """
        Checks correctness of supplied parameters to `self.generate_trace` or `self.generate_log`.

        Parameters:
          given: The given dictionary.
          required : The required items.

        Raises a ``MissingRequiredParameter`` when something that was required wasn't there.
        """
        missing = [param for param in required if param not in given]
        if missing:
            raise MissingRequiredParameter(missing)

    def __build_params(
        self, given: Dict[str, Any], required: List[Any]
    ) -> Dict[str, Any]:
        """
        Builds a keyword-argument dictionary given the parameters in `self.generate_trace` or `self.generate_log`.

        Parameters:
          given: The given dictionary.
          required : The required items.

        Returns a dictionary of the form:
        ```
        {
            "param1" : value,
            "param2" : value,
            ...
        }
        ```
        """
        return {
            required_parameter: given[required_parameter]
            for required_parameter in required
        }

    def __build_param_matrix(
        self, givens: Dict[str, Any], requireds: List[str]
    ) -> List[Dict[str, int]]:
        """
        Builds parameter list for usage in `self.generate_log`.

        Transforms a dictionary of shape:
        ```
        {
            "required_param1" : [1, 2, ..],
            "required_param2" : [1, 2, 3, 4, ..],
            "required_param3" : [1, ..],
            ..
        }
        ```
        to our wanted list of shape:
        ```
        [
            {
                "required_param1" : 1,
                "required_param2" : 1,
                "required_param3" : 1
            },
            ...
            {
                "required_param1" : 3,
                "required_param2" : 3,
                "required_param3" : 3
            }
        ]
        ```

        Parameters:
          given: The given dictionary.
          required : The required items.

        Returns a list of dictionaries as listed above.
        """
        # Keep result variable
        result = []

        # Create dictionary out of lists
        array_dict = self.__build_params(givens, requireds)

        # Compute the cartesian product using itertools
        compute_product_of_me = list(array_dict.values())
        for tuple_of_vals in itertools.product(*compute_product_of_me):
            # tuple_of_vals: (a, b, c, ...)
            # len(item) == len(keys)
            # item[0] corresponds with keys[0]
            keys = [key[:-1] for key in array_dict.keys()]
            result.append({key: value for (key, value) in zip(keys, tuple_of_vals)})

        return result

    def __check_length_with_params(self, seq_name: str) -> None:
        """
        Checks whether or not we can mathematically generate a trace of length `self.length`
        given a particular generator, identified by `seq_name`.

        Parameters:
          seq_name: The name of the sequence generation method for which to perform this check.

        Raises an `InvalidLengthException` when a sequence cannot be generated
        due to mismatch of required params and wanted length.
        """
        min_len_for_method = len(self.__get_params(seq_name))
        if min_len_for_method > self.length:
            raise InvalidLengthException(
                length=min_len_for_method,
                message=f"Cannot generate sequence of length {self.length}\
                    if a method needs a minimum of %s parameters",
            )

    # Public methods
    def get_generators(self) -> List[str]:
        """
        Gets implemented generator functions.

        Returns a list of the names of implemented generator functions.
        """
        return [generator for generator in self.config.keys()]

    def generate_trace(self, seq_name: str, **kwargs: Any) -> Generator:
        """
        Generates a single trace corresponding to some sequence.

        Parameters:
          seq_name: The name of the sequence generation method for which to generate a trace.

        Raises a `NotYetImplemented` when the `seq_name` key does not correspond to a generator method.
        Raises a `MissingRequiredParameter` when a particular parameter was not provided.

        Returns a generator for a particular sequence.
        """
        try:
            check_item_list(seq_name.strip().lower(), self.get_generators())
        except MissingItem:
            raise NotYetImplemented(seq_name)

        # It exists, check for param mismatch
        required_params = self.__get_params(seq_name)

        self.__check_params(kwargs, required_params)

        # required (and possibly more) params present -- retrieve reference to generator
        method = self.__get_method(seq_name)

        # build params to pass through
        method_params = self.__build_params(kwargs, required_params)

        # call the function, and return its result
        return method(method_params)

    def generate_log(self, seq_name: str, **kwargs: Any) -> List[Tuple[int, ...]]:
        """
        Generates an entire log corresponding to some sequence.

        Parameters:
          seq_name: The name of the sequence generation method for which to generate a log.

        Raises a `NotYetImplemented` when the `seq_name` key does not correspond to a generator method.
        Raises a `MissingRequiredParameter` when a particular parameter was not provided.

        Returns a log of traces a list of tuples.
        """
        try:
            check_item_list(seq_name.strip().lower(), self.get_generators())
        except MissingItem:
            raise NotYetImplemented(seq_name)

        required_params = [param + "s" for param in self.__get_params(seq_name)]
        self.__check_params(kwargs, required_params)
        self.__check_length_with_params(seq_name)

        # Create the log variable as a set
        log = []

        for params in self.__build_param_matrix(kwargs, required_params):
            trace = tuple(self.generate_trace(seq_name, **params))
            log.append(trace)

        return log

Instance variables

var config

A config object that holds, per implemented metod, a list of required parameters and a reference to the method.

var length

The amount of items to generate.

Methods

def _SequenceGenerator__build_param_matrix(self, givens: Dict[str, Any], requireds: List[str]) ‑> List[Dict[str, int]]

Builds parameter list for usage in self.generate_log.

Transforms a dictionary of shape:

{
    "required_param1" : [1, 2, ..],
    "required_param2" : [1, 2, 3, 4, ..],
    "required_param3" : [1, ..],
    ..
}

to our wanted list of shape:

[
    {
        "required_param1" : 1,
        "required_param2" : 1,
        "required_param3" : 1
    },
    ...
    {
        "required_param1" : 3,
        "required_param2" : 3,
        "required_param3" : 3
    }
]

Parameters

given: The given dictionary. required : The required items.

Returns a list of dictionaries as listed above.

Expand source code
def __build_param_matrix(
    self, givens: Dict[str, Any], requireds: List[str]
) -> List[Dict[str, int]]:
    """
    Builds parameter list for usage in `self.generate_log`.

    Transforms a dictionary of shape:
    ```
    {
        "required_param1" : [1, 2, ..],
        "required_param2" : [1, 2, 3, 4, ..],
        "required_param3" : [1, ..],
        ..
    }
    ```
    to our wanted list of shape:
    ```
    [
        {
            "required_param1" : 1,
            "required_param2" : 1,
            "required_param3" : 1
        },
        ...
        {
            "required_param1" : 3,
            "required_param2" : 3,
            "required_param3" : 3
        }
    ]
    ```

    Parameters:
      given: The given dictionary.
      required : The required items.

    Returns a list of dictionaries as listed above.
    """
    # Keep result variable
    result = []

    # Create dictionary out of lists
    array_dict = self.__build_params(givens, requireds)

    # Compute the cartesian product using itertools
    compute_product_of_me = list(array_dict.values())
    for tuple_of_vals in itertools.product(*compute_product_of_me):
        # tuple_of_vals: (a, b, c, ...)
        # len(item) == len(keys)
        # item[0] corresponds with keys[0]
        keys = [key[:-1] for key in array_dict.keys()]
        result.append({key: value for (key, value) in zip(keys, tuple_of_vals)})

    return result
def _SequenceGenerator__build_params(self, given: Dict[str, Any], required: List[Any]) ‑> Dict[str, Any]

Builds a keyword-argument dictionary given the parameters in self.generate_trace or self.generate_log.

Parameters

given: The given dictionary. required : The required items.

Returns a dictionary of the form:

{
    "param1" : value,
    "param2" : value,
    ...
}
Expand source code
def __build_params(
    self, given: Dict[str, Any], required: List[Any]
) -> Dict[str, Any]:
    """
    Builds a keyword-argument dictionary given the parameters in `self.generate_trace` or `self.generate_log`.

    Parameters:
      given: The given dictionary.
      required : The required items.

    Returns a dictionary of the form:
    ```
    {
        "param1" : value,
        "param2" : value,
        ...
    }
    ```
    """
    return {
        required_parameter: given[required_parameter]
        for required_parameter in required
    }
def _SequenceGenerator__catalan(self, first: int = 1) ‑> Generator

Generates the Catalan numbers, where the first integer is parametrised. The catalan sequence is available here.

Implemented using dynamic programming as the direct formula has issues with n > 30 In particular, 14544636039226909 became 14544636039226908 and all subsequent values were off.

Parameters

first: The first element of the sequence. The original sequence defines this as 1.

Returns a generator that generates the sequence.

Expand source code
def __catalan(self, first: int = 1) -> Generator:
    """
    Generates the Catalan numbers, where the first integer is parametrised.
    The catalan sequence is available [here](https://oeis.org/A000108).

    Implemented using dynamic programming as the direct formula has issues with `n > 30`
    In particular, 14544636039226909 became 14544636039226908 and all subsequent values were off.

    Parameters:
      first: The first element of the sequence.
               The original sequence defines this as 1.

    Returns a generator that generates the sequence.
    """
    # Initialise dynamic programming table
    dp = [0] * (self.length + 1)
    dp[0] = first  # in our case this is a parameter. By default it should be 1.
    dp[1] = first  # in our case this is a parameter. By default it should be 1.

    # Fill the dp entries based on the recursive formula
    for i in range(2, self.length + 1):
        for j in range(i):
            dp[i] += dp[j] * dp[i - j - 1]

    # Loop through table, and yield the next item as long as the index does not exceed the length!
    for index in range(len(dp)):
        if index < self.length:
            yield dp[index]
def _SequenceGenerator__catalan_wrapper(self, params: Dict[str, int]) ‑> Generator

Wrapper method for self.recaman. Written so we can have a unified interface to generate traces, given a sequence key.

Unsafe when used in any other place than the generation config dict SequenceGenerator.config.

Expand source code
def __catalan_wrapper(self, params: Dict[str, int]) -> Generator:
    """
    Wrapper method for `self.recaman`.
    Written so we can have a unified interface to generate traces, given a sequence key.

    **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`.
    """
    return self.__catalan(first=params["first"])
def _SequenceGenerator__check_length_with_params(self, seq_name: str) ‑> NoneType

Checks whether or not we can mathematically generate a trace of length self.length given a particular generator, identified by seq_name.

Parameters

seq_name: The name of the sequence generation method for which to perform this check.

Raises an InvalidLengthException when a sequence cannot be generated due to mismatch of required params and wanted length.

Expand source code
def __check_length_with_params(self, seq_name: str) -> None:
    """
    Checks whether or not we can mathematically generate a trace of length `self.length`
    given a particular generator, identified by `seq_name`.

    Parameters:
      seq_name: The name of the sequence generation method for which to perform this check.

    Raises an `InvalidLengthException` when a sequence cannot be generated
    due to mismatch of required params and wanted length.
    """
    min_len_for_method = len(self.__get_params(seq_name))
    if min_len_for_method > self.length:
        raise InvalidLengthException(
            length=min_len_for_method,
            message=f"Cannot generate sequence of length {self.length}\
                if a method needs a minimum of %s parameters",
        )
def _SequenceGenerator__check_params(self, given: Dict[str, Any], required: List[str]) ‑> NoneType

Checks correctness of supplied parameters to self.generate_trace or self.generate_log.

Parameters

given: The given dictionary. required : The required items.

Raises a MissingRequiredParameter when something that was required wasn't there.

Expand source code
def __check_params(self, given: Dict[str, Any], required: List[str]) -> None:
    """
    Checks correctness of supplied parameters to `self.generate_trace` or `self.generate_log`.

    Parameters:
      given: The given dictionary.
      required : The required items.

    Raises a ``MissingRequiredParameter`` when something that was required wasn't there.
    """
    missing = [param for param in required if param not in given]
    if missing:
        raise MissingRequiredParameter(missing)
def _SequenceGenerator__fib(self, first: int = 1, second: int = 1) ‑> Generator

Yield the first self.length numbers of the Fibonnaci sequence where the first term is first and the second term is second.

Parameters

first: The first element of the sequence. second: The second element of the sequence.

Returns a generator that generates the sequence.

Expand source code
def __fib(self, first: int = 1, second: int = 1) -> Generator:
    """
    Yield the first `self.length` numbers of the Fibonnaci sequence
    where the first term is `first` and the second term is `second`.

    Parameters:
      first: The first element of the sequence.
      second: The second element of the sequence.

    Returns a generator that generates the sequence.
    """
    yield first
    # If you only want 1 number, for some reason?
    if self.length == 1:
        return

    yield second

    count = 0
    while count < self.length - 2:
        # Increment count
        count += 1
        # Compute next value
        current = first + second
        yield current
        # Update values
        first = second
        second = current
def _SequenceGenerator__fib_wrapper(self, params: Dict[str, int]) ‑> Generator

Wrapper method for self.fib. Written so we can have a unified interface to generate traces, given a sequence key.

Unsafe when used in any other place than the generation config dict SequenceGenerator.config.

Expand source code
def __fib_wrapper(self, params: Dict[str, int]) -> Generator:
    """
    Wrapper method for `self.fib`.
    Written so we can have a unified interface to generate traces, given a sequence key.

    **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`.
    """
    return self.__fib(first=params["first"], second=params["second"])
def _SequenceGenerator__get_method(self, seq_name: str) ‑> Callable[[Dict[str, int]], Generator]

Gets the method reference for a particular sequence generator.

Parameters

seq_name: The name of the sequence generation method for which to retrieve a method reference.

Returns a method reference.

Expand source code
def __get_method(self, seq_name: str) -> Callable[[Dict[str, int]], Generator]:
    """
    Gets the method reference for a particular sequence generator.

    Parameters:
      seq_name: The name of the sequence generation method for which to retrieve a method reference.

    Returns a method reference.
    """
    return self.config[seq_name]["method"]
def _SequenceGenerator__get_params(self, seq_name: str) ‑> List[str]

Gets config parameters for a particul sequence generator.

Parameters

seq_name: The name of the sequence generation method for which to retrieve parameters.

Returns a list of parameters.

Expand source code
def __get_params(self, seq_name: str) -> List[str]:
    """
    Gets config parameters for a particul sequence generator.

    Parameters:
      seq_name: The name of the sequence generation method for which to retrieve parameters.

    Returns a list of parameters.
    """
    params = [str(item) for item in self.config[seq_name]["parameters"]]
    return params
def _SequenceGenerator__long_term_dependency(self, first: int = 0, second: int = 0, third: int = 0, fourth: int = 0, fifth: int = 0) ‑> Generator

Generalization of Fibonacci sequence with increased dependency, where F(n) = F(n-1) + F(n-5). In other words, the 6th term is equal to the sum of the 5th and the 1st.

Expand source code
def __long_term_dependency(
    self,
    first: int = 0,
    second: int = 0,
    third: int = 0,
    fourth: int = 0,
    fifth: int = 0,
) -> Generator:
    """
    Generalization of Fibonacci sequence with increased dependency, where F(n) = F(n-1) + F(n-5).
    In other words, the 6th term is equal to the sum of the 5th and the 1st.
    """
    count = 0

    # Basecases
    should_check = [first, second, third, fourth, fifth]
    for num in should_check:
        if count < self.length:
            yield num
            count += 1
        else:
            return

    # Assume we want the next element in general case

    # Need to keep points to 5 previous elements
    n_minus_1 = fifth
    n_minus_2 = fourth
    n_minus_3 = third
    n_minus_4 = second
    n_minus_5 = first

    while count < self.length:
        # Increment count
        count += 1

        # Compute next number and yield it
        n = n_minus_5 + n_minus_1
        yield n

        # Update values
        n_minus_1, n_minus_2, n_minus_3, n_minus_4, n_minus_5 = (
            n,
            n_minus_1,
            n_minus_2,
            n_minus_3,
            n_minus_4,
        )
def _SequenceGenerator__long_term_dependency_wrapper(self, params: Dict[str, int]) ‑> Generator

Wrapper method for self.long_term_dependency. Written so we can have a unified interface to generate traces, given a sequence key.

Unsafe when used in any other place than the generation config dict SequenceGenerator.config.

Expand source code
def __long_term_dependency_wrapper(self, params: Dict[str, int]) -> Generator:
    """
    Wrapper method for `self.long_term_dependency`.
    Written so we can have a unified interface to generate traces, given a sequence key.

    **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`.
    """
    return self.__long_term_dependency(
        first=params["first"],
        second=params["second"],
        third=params["third"],
        fourth=params["fourth"],
        fifth=params["fifth"],
    )
def _SequenceGenerator__long_term_single_dependency(self, first: int = 0, second: int = 0, third: int = 0, fourth: int = 0, fifth: int = 0, constant: int = 1) ‑> Generator

F(n) = F(n-5) * c.

Expand source code
def __long_term_single_dependency(
    self,
    first: int = 0,
    second: int = 0,
    third: int = 0,
    fourth: int = 0,
    fifth: int = 0,
    constant: int = 1,
) -> Generator:
    """
    F(n) = F(n-5) * c.
    """
    count = 0

    # Basecases
    should_check = [first, second, third, fourth, fifth]
    for num in should_check:
        if count < self.length:
            yield num
            count += 1
        else:
            return

    # Assume we want the next element in general case

    # Need to keep points to 5 previous elements
    n_minus_1 = fifth
    n_minus_2 = fourth
    n_minus_3 = third
    n_minus_4 = second
    n_minus_5 = first

    while count < self.length:
        # Increment count
        count += 1

        # Compute next number and yield it
        n = n_minus_5 * constant
        yield n

        # Update values
        n_minus_1, n_minus_2, n_minus_3, n_minus_4, n_minus_5 = (
            n,
            n_minus_1,
            n_minus_2,
            n_minus_3,
            n_minus_4,
        )
def _SequenceGenerator__long_term_single_dependency_wrapper(self, params: Dict[str, int]) ‑> Generator

Wrapper method for self.__long_term_single_dependency. Written so we can have a unified interface to generate traces, given a sequence key.

Unsafe when used in any other place than the generation config dict SequenceGenerator.config.

Expand source code
def __long_term_single_dependency_wrapper(
    self, params: Dict[str, int]
) -> Generator:
    """
    Wrapper method for `self.__long_term_single_dependency`.
    Written so we can have a unified interface to generate traces, given a sequence key.

    **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`.
    """
    return self.__long_term_single_dependency(
        first=params["first"],
        second=params["second"],
        third=params["third"],
        fourth=params["fourth"],
        fifth=params["fifth"],
        constant=params["constant"],
    )
def _SequenceGenerator__pascal(self, first: int = 1) ‑> Generator

Yield the first self.length numbers of the sequence defined by reading the pascal triangle from left to right, top to bottom, where the first integer is first (usually this is 1).

Parameters

first: The first integer on top of the triangle, and consequently the first integer in the sequence.

Returns a generator that generates the sequence.

Expand source code
def __pascal(self, first: int = 1) -> Generator:
    """
    Yield the first `self.length` numbers of the sequence defined by
    reading the pascal triangle from left to right, top to bottom,
    where the first integer is `first` (usually this is 1).

    Parameters:
      first: The first integer on top of the triangle,
               and consequently the first integer in the sequence.

    Returns a generator that generates the sequence.
    """
    yield first

    # If for some reason you only want the first number?
    if self.length == 1:
        return

    def next_row(row: List[int]) -> List[int]:
        """
        Computes the next row in the triangle of pascal.

        Parameters:
          row: The current row.

        Returns a the next row.
        """
        lst = []
        tmp = 0
        for val in row:
            lst.append(tmp + val)
            tmp = val
        lst.append(first)
        return lst

    row = [first]

    # Keep track of counts, start at 1
    counts = 1

    while True:
        # compute the next row
        next = next_row(row)

        # loop through them
        for item in next:
            # test for length
            if counts < self.length:
                yield item
                counts += 1
            else:
                return

        row = next
def _SequenceGenerator__pascal_wrapper(self, params: Dict[str, int]) ‑> Generator

Wrapper method for self.pascal. Written so we can have a unified interface to generate traces, given a sequence key.

Unsafe when used in any other place than the generation config dict SequenceGenerator.config.

Expand source code
def __pascal_wrapper(self, params: Dict[str, int]) -> Generator:
    """
    Wrapper method for `self.pascal`.
    Written so we can have a unified interface to generate traces, given a sequence key.

    **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`.
    """
    return self.__pascal(first=params["first"])
def _SequenceGenerator__range_down(self, last: int = 0, step: int = 1) ‑> Generator

Simple range generator that counts down with stepsize step such that the last element will be last.

Parameters

last: The last element of the sequence. step: The stepsize passed to range

Returns a generator that generates the sequence.

Expand source code
def __range_down(self, last: int = 0, step: int = 1) -> Generator:
    """
    Simple range generator that counts down with stepsize `step`
    such that the last element will be `last`.

    Parameters:
      last: The last element of the sequence.
      step: The stepsize passed to range

    Returns a generator that generates the sequence.
    """
    # Compute highest possible value such that we do not go negative
    first = last + step * self.length - 1

    # Use our own generator
    def range_generator(first: int, step: int) -> Generator:
        """
        Internal range generator that counts down.
        """
        i = first
        while True:
            yield i
            i -= step

    count = 0
    for i in range_generator(first, step):
        if count < self.length:
            yield i
            count += 1
        else:
            break
def _SequenceGenerator__range_down_wrapper(self, params: Dict[str, int]) ‑> Generator

Wrapper method for self.range_down. Written so we can have a unified interface to generate traces, given a sequence key.

Unsafe when used in any other place than the generation config dict SequenceGenerator.config.

Expand source code
def __range_down_wrapper(self, params: Dict[str, int]) -> Generator:
    """
    Wrapper method for `self.range_down`.
    Written so we can have a unified interface to generate traces, given a sequence key.

    **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`.
    """
    return self.__range_down(last=params["last"], step=params["step"])
def _SequenceGenerator__range_up(self, first: int = 0, step: int = 1) ‑> Generator

Simple range generator that counts up.

Parameters

first: The first element of the sequence. step: The stepsize passed to range

Returns a generator that generates the sequence.

Expand source code
def __range_up(self, first: int = 0, step: int = 1) -> Generator:
    """
    Simple range generator that counts up.

    Parameters:
      first: The first element of the sequence.
      step: The stepsize passed to range

    Returns a generator that generates the sequence.
    """

    # Use our own generator
    def range_generator(first: int, step: int) -> Generator:
        """
        Internal range generator that counts up.
        """
        i = first
        while True:
            yield i
            i += step

    count = 0
    for i in range_generator(first, step):
        if count < self.length:
            yield i
            count += 1
        else:
            break
def _SequenceGenerator__range_up_wrapper(self, params: Dict[str, int]) ‑> Generator

Wrapper method for self.range_up. Written so we can have a unified interface to generate traces, given a sequence key.

Unsafe when used in any other place than the generation config dict SequenceGenerator.config.

Expand source code
def __range_up_wrapper(self, params: Dict[str, int]) -> Generator:
    """
    Wrapper method for `self.range_up`.
    Written so we can have a unified interface to generate traces, given a sequence key.

    **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`.
    """
    return self.__range_up(first=params["first"], step=params["step"])
def _SequenceGenerator__recaman(self, first: int = 0) ‑> Generator

Generator for the Recaman's sequence, a well known sequence from the on-line encyclopedia of integer sequences. Available here.

Parameters

first: The first element of the sequence. The original sequence defines this as 0.

Returns a generator that generates the sequence.

Expand source code
def __recaman(self, first: int = 0) -> Generator:
    """
    Generator for the Recaman's sequence, a well known sequence
    from the on-line encyclopedia of integer sequences.
    Available [here](https://oeis.org/A005132).

    Parameters:
      first: The first element of the sequence.
               The original sequence defines this as 0.

    Returns a generator that generates the sequence.
    """
    # If for some reason you only want the first number?
    if self.length == 1:
        yield first
        return

    # Keep track of a count and already seen digits
    count = 0
    current = first
    already_seen = set([])

    def get_next(current: int, index: int) -> int:
        """
        Computes the next value in the sequence.

        Parameters:
          current: The current value.
          index: The index of the current value.

        Returns the new value.
        """
        # Compute a(n) = a(n-1) - n
        # if nonnegative and not in sequence, return
        new_number = current - index

        if (new_number < 0) | (new_number in already_seen):
            # Negative or already seen: add index in stead
            new_number = current + index

        return new_number

    while count < self.length:
        # Compute next term in the sequence
        new = get_next(current, count)

        # Save it to the set of already seen terms
        already_seen.add(new)

        # Set current to new value and increase count
        count += 1
        current = new

        # Finally, yield the correct value
        yield current
def _SequenceGenerator__recaman_wrapper(self, params: Dict[str, int]) ‑> Generator

Wrapper method for self.recaman. Written so we can have a unified interface to generate traces, given a sequence key.

Unsafe when used in any other place than the generation config dict SequenceGenerator.config.

Expand source code
def __recaman_wrapper(self, params: Dict[str, int]) -> Generator:
    """
    Wrapper method for `self.recaman`.
    Written so we can have a unified interface to generate traces, given a sequence key.

    **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`.
    """
    return self.__recaman(first=params["first"])
def _SequenceGenerator__short_term_single_dependency(self, first: int = 1, constant: int = 2) ‑> Generator

A short term dependency. F(n) = F(n-1) * c

Expand source code
def __short_term_single_dependency(
    self, first: int = 1, constant: int = 2
) -> Generator:
    """
    A short term dependency. F(n) = F(n-1) * c
    """
    yield first
    # If you only want 1 number, for some reason?
    if self.length == 1:
        return

    count = 0
    while count < self.length - 1:
        # Increment count
        count += 1
        # Compute next value
        current = first * constant
        yield current
        # Update values
        first = current
def _SequenceGenerator__short_term_single_dependency_wrapper(self, params: Dict[str, int]) ‑> Generator

Wrapper method for self.range_down. Written so we can have a unified interface to generate traces, given a sequence key.

Unsafe when used in any other place than the generation config dict SequenceGenerator.config.

Expand source code
def __short_term_single_dependency_wrapper(
    self, params: Dict[str, int]
) -> Generator:
    """
    Wrapper method for `self.range_down`.
    Written so we can have a unified interface to generate traces, given a sequence key.

    **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`.
    """
    return self.__short_term_single_dependency(
        first=params["first"], constant=params["constant"]
    )
def generate_log(self, seq_name: str, **kwargs: Any) ‑> List[Tuple[int, ...]]

Generates an entire log corresponding to some sequence.

Parameters

seq_name: The name of the sequence generation method for which to generate a log.

Raises a NotYetImplemented when the seq_name key does not correspond to a generator method. Raises a MissingRequiredParameter when a particular parameter was not provided.

Returns a log of traces a list of tuples.

Expand source code
def generate_log(self, seq_name: str, **kwargs: Any) -> List[Tuple[int, ...]]:
    """
    Generates an entire log corresponding to some sequence.

    Parameters:
      seq_name: The name of the sequence generation method for which to generate a log.

    Raises a `NotYetImplemented` when the `seq_name` key does not correspond to a generator method.
    Raises a `MissingRequiredParameter` when a particular parameter was not provided.

    Returns a log of traces a list of tuples.
    """
    try:
        check_item_list(seq_name.strip().lower(), self.get_generators())
    except MissingItem:
        raise NotYetImplemented(seq_name)

    required_params = [param + "s" for param in self.__get_params(seq_name)]
    self.__check_params(kwargs, required_params)
    self.__check_length_with_params(seq_name)

    # Create the log variable as a set
    log = []

    for params in self.__build_param_matrix(kwargs, required_params):
        trace = tuple(self.generate_trace(seq_name, **params))
        log.append(trace)

    return log
def generate_trace(self, seq_name: str, **kwargs: Any) ‑> Generator

Generates a single trace corresponding to some sequence.

Parameters

seq_name: The name of the sequence generation method for which to generate a trace.

Raises a NotYetImplemented when the seq_name key does not correspond to a generator method. Raises a MissingRequiredParameter when a particular parameter was not provided.

Returns a generator for a particular sequence.

Expand source code
def generate_trace(self, seq_name: str, **kwargs: Any) -> Generator:
    """
    Generates a single trace corresponding to some sequence.

    Parameters:
      seq_name: The name of the sequence generation method for which to generate a trace.

    Raises a `NotYetImplemented` when the `seq_name` key does not correspond to a generator method.
    Raises a `MissingRequiredParameter` when a particular parameter was not provided.

    Returns a generator for a particular sequence.
    """
    try:
        check_item_list(seq_name.strip().lower(), self.get_generators())
    except MissingItem:
        raise NotYetImplemented(seq_name)

    # It exists, check for param mismatch
    required_params = self.__get_params(seq_name)

    self.__check_params(kwargs, required_params)

    # required (and possibly more) params present -- retrieve reference to generator
    method = self.__get_method(seq_name)

    # build params to pass through
    method_params = self.__build_params(kwargs, required_params)

    # call the function, and return its result
    return method(method_params)
def get_generators(self) ‑> List[str]

Gets implemented generator functions.

Returns a list of the names of implemented generator functions.

Expand source code
def get_generators(self) -> List[str]:
    """
    Gets implemented generator functions.

    Returns a list of the names of implemented generator functions.
    """
    return [generator for generator in self.config.keys()]
class XESTransformator

Transforms a process mining log into integer sequences. Can only handle extensions present in readable_exts.

Attributes

readable_exts – All extensions that this transformator can handle. Initialises the XESTransformator class.

Expand source code
class XESTransformator:
    """
    Transforms a process mining log into integer sequences.
    Can only handle extensions present in `readable_exts`.

    Attributes:
      readable_exts -- All extensions that this transformator can handle.
    """

    # Class Methods
    def __init__(self) -> None:
        """
        Initialises the XESTransformator class.
        """
        self.readable_exts = [".xes", ".xes.gz"]
        """All extensions that this transformator can handle."""

    def __repr__(self) -> str:
        return f"XESTransformator(readable_exts={self.readable_exts})"

    # Helper methods
    def __check_log(self, file: str) -> None:
        """
        Performs some checks on the file and
        raises exceptions when something is wrong.

        In particular:
        Check 0: Check if it is a file.
        Check 1: Check if the file is readable.
        Check 2: Check if the extension is one that we can parse.

        Parameters:
          file -- The file (as string) to check. Can be relative or absolute path.

        Raises `FileNotFoundError` if check 0 fails.
        Raises `PermissionError` if check 1 fails.
        Raises `InvalidLogFormat` if check 2 fails.
        """
        # Check 0: Did we get a file?
        if not isfile(file):
            raise FileNotFoundError(file)

        # Check 1: Can we read the file?
        if not access(file, R_OK):
            raise PermissionError(file)

        # Check 2: Is the extension in one of the readable ones?
        if not any([True for ext in self.readable_exts if ext in file]):
            raise InvalidLogFormat(filepath=file)

    def __build_mapping(self, root: ET.Element, file: str) -> Dict[str, int]:
        """
        Builds a mapping from key (XES concept:name) to integer.

        Parameters:
          root -- The root element (`<log>`) of an XES log file.
          file -- Path to the file, used for reporting errors.

        Returns a dictionary mapping a string (key) to an integer.
        """
        # Check element tag
        if "log" not in root.tag:
            raise InvalidElementPassed(expected="log", element=root.tag)

        # Retrieve element by XPath
        dictionary_root_field = root.find(
            ".//*[@key='meta_concept:named_events_total']"
        )

        if dictionary_root_field is None:
            raise ParsingError(
                filepath=file,
                reason="'meta_concept:named_events_total' is not"
                + "present in the log. Does your log adhere to the OpenXES standard?",
            )

        # Initialise the mapping as empty dictionary
        mapping = dict()

        # Loop through all elements
        for index, element in enumerate(dictionary_root_field.iter()):
            # Skip the root item itself.
            if element != dictionary_root_field:
                # Retrieve the key from element attributes
                key = element.attrib["key"]

                # Set the mapping entry to an integer (index is used)
                mapping[key] = index

        return mapping

    def __get_all_traces(self, root: ET.Element) -> List[ET.Element]:
        """
        Given the root element (should be log), return a list of all trace elements.

        Parameters:
          root -- The root element (`<log>`) of an XES log file.

        Returns a list of trace elements (`<trace>`).
        """
        if "log" not in root.tag:
            raise InvalidElementPassed(expected="log", element=root.tag)

        return [elem for elem in root.iter() if "trace" in elem.tag]

    def __get_all_events(self, root: ET.Element) -> List[ET.Element]:
        """
        Given the root element (should be trace), return a list of all event elements.

        Parameters:
          root -- Any trace element (`<trace>`) of an XES log file.

        Returns a list of event elements (`<event>`).
        """
        if "trace" not in root.tag:
            raise InvalidElementPassed(expected="trace", element=root.tag)

        return [elem for elem in root.iter() if "event" in elem.tag]

    # Parsing methods
    def __parse_with_ET(self, file: str, gzipped: bool = False) -> ET.ElementTree:
        """
        Parse the XML with ElementTree
        Distinguishes between gzipped and normal format.

        Parameters:
          file -- The file to parse.
          gzipped -- Boolean indicating whether or not the file is gzipped.
                     False by default.
        """
        try:
            if gzipped:
                with gzip.open(file) as unzipped_file:
                    return ET.parse(unzipped_file)
            return ET.parse(file)
        except ET.ParseError:
            raise ParsingError(
                filepath=file, reason="Element Tree ParseError was raised."
            )

    def __parse(self, file: str) -> ET.Element:
        """
        Parses a file (that is either .xes or .xes.gz) with ElementTree and returns the root element.

        Parameters:
          file -- The file to parse.

        Returns the root element (`<log>`) of an XES log.
        """
        tree = self.__parse_with_ET(file, ".gz" in file)
        return tree.getroot()

    # Transforming methods
    def __convert_trace(
        self, trace: ET.Element, mapping: Dict[str, int], file: str
    ) -> Tuple[int, ...]:
        """
        Converts a single trace element into a tuple of integers.

        Parameters:
          trace -- The trace element (`<trace>`) to convert.
          mapping -- The mapping that defines how to convert.
          file -- Path to file, used for logging.

        Returns a tuple of integers representing a trace according to some mapping.
        """

        # Check element tag
        if "trace" not in trace.tag:
            raise InvalidElementPassed(expected="trace", element=trace.tag)

        converted = []
        for event in self.__get_all_events(trace):
            key_element = event.find(".//*[@key='concept:name']")

            # Explicit not None check -> Ensures that key element is of type Element.
            if key_element is not None:
                key_itself = key_element.attrib["value"]
                converted.append(mapping[key_itself])
            else:
                # Got None, cannot process this file.
                raise ParsingError(
                    filepath=file,
                    reason="Cannot find key elements while transforming traces.",
                )

        return tuple(converted)

    def __make_log(
        self, root: ET.Element, mapping: Dict[str, int], file: str
    ) -> List[Tuple[int, ...]]:
        """
        Makes a log, given a root element and a mapping dictionary.

        Parameters:
          root -- the root element (`<log>`) of an XES log.
          mapping -- a mapping from key (XES concept:name) to integer.
          file -- Path to file, used for logging.

        Returns a transformed log.
        """

        # Check element tag
        if "log" not in root.tag:
            raise InvalidElementPassed(expected="log", element=root.tag)

        # Initialise empty log list
        log = []

        # Iterate over all traces
        for trace in self.__get_all_traces(root):
            # Convert a single trace and add it to the log
            converted_trace = self.__convert_trace(trace, mapping, file)
            log.append(converted_trace)

        return log

    def transform(self, log: str) -> List[Tuple[int, ...]]:
        """
        Transforms a XES log into integer sequences.

        Parameters:
          log -- A logfile to transform to integer sequences.

        Returnsa transformed log.
        """
        # Check the log
        self.__check_log(log)

        # Parse the log if possible
        root = self.__parse(log)

        # Build the name mapping
        name_mapping = self.__build_mapping(root, log)

        # Build the log
        return self.__make_log(root, name_mapping, log)

Instance variables

var readable_exts

All extensions that this transformator can handle.

Methods

def _XESTransformator__build_mapping(self, root: xml.etree.ElementTree.Element, file: str) ‑> Dict[str, int]

Builds a mapping from key (XES concept:name) to integer.

Parameters

root – The root element (<log>) of an XES log file. file – Path to the file, used for reporting errors.

Returns a dictionary mapping a string (key) to an integer.

Expand source code
def __build_mapping(self, root: ET.Element, file: str) -> Dict[str, int]:
    """
    Builds a mapping from key (XES concept:name) to integer.

    Parameters:
      root -- The root element (`<log>`) of an XES log file.
      file -- Path to the file, used for reporting errors.

    Returns a dictionary mapping a string (key) to an integer.
    """
    # Check element tag
    if "log" not in root.tag:
        raise InvalidElementPassed(expected="log", element=root.tag)

    # Retrieve element by XPath
    dictionary_root_field = root.find(
        ".//*[@key='meta_concept:named_events_total']"
    )

    if dictionary_root_field is None:
        raise ParsingError(
            filepath=file,
            reason="'meta_concept:named_events_total' is not"
            + "present in the log. Does your log adhere to the OpenXES standard?",
        )

    # Initialise the mapping as empty dictionary
    mapping = dict()

    # Loop through all elements
    for index, element in enumerate(dictionary_root_field.iter()):
        # Skip the root item itself.
        if element != dictionary_root_field:
            # Retrieve the key from element attributes
            key = element.attrib["key"]

            # Set the mapping entry to an integer (index is used)
            mapping[key] = index

    return mapping
def _XESTransformator__check_log(self, file: str) ‑> NoneType

Performs some checks on the file and raises exceptions when something is wrong.

In particular: Check 0: Check if it is a file. Check 1: Check if the file is readable. Check 2: Check if the extension is one that we can parse.

Parameters

file – The file (as string) to check. Can be relative or absolute path.

Raises FileNotFoundError if check 0 fails. Raises PermissionError if check 1 fails. Raises InvalidLogFormat if check 2 fails.

Expand source code
def __check_log(self, file: str) -> None:
    """
    Performs some checks on the file and
    raises exceptions when something is wrong.

    In particular:
    Check 0: Check if it is a file.
    Check 1: Check if the file is readable.
    Check 2: Check if the extension is one that we can parse.

    Parameters:
      file -- The file (as string) to check. Can be relative or absolute path.

    Raises `FileNotFoundError` if check 0 fails.
    Raises `PermissionError` if check 1 fails.
    Raises `InvalidLogFormat` if check 2 fails.
    """
    # Check 0: Did we get a file?
    if not isfile(file):
        raise FileNotFoundError(file)

    # Check 1: Can we read the file?
    if not access(file, R_OK):
        raise PermissionError(file)

    # Check 2: Is the extension in one of the readable ones?
    if not any([True for ext in self.readable_exts if ext in file]):
        raise InvalidLogFormat(filepath=file)
def _XESTransformator__convert_trace(self, trace: xml.etree.ElementTree.Element, mapping: Dict[str, int], file: str) ‑> Tuple[int, ...]

Converts a single trace element into a tuple of integers.

Parameters

trace – The trace element (<trace>) to convert. mapping – The mapping that defines how to convert. file – Path to file, used for logging.

Returns a tuple of integers representing a trace according to some mapping.

Expand source code
def __convert_trace(
    self, trace: ET.Element, mapping: Dict[str, int], file: str
) -> Tuple[int, ...]:
    """
    Converts a single trace element into a tuple of integers.

    Parameters:
      trace -- The trace element (`<trace>`) to convert.
      mapping -- The mapping that defines how to convert.
      file -- Path to file, used for logging.

    Returns a tuple of integers representing a trace according to some mapping.
    """

    # Check element tag
    if "trace" not in trace.tag:
        raise InvalidElementPassed(expected="trace", element=trace.tag)

    converted = []
    for event in self.__get_all_events(trace):
        key_element = event.find(".//*[@key='concept:name']")

        # Explicit not None check -> Ensures that key element is of type Element.
        if key_element is not None:
            key_itself = key_element.attrib["value"]
            converted.append(mapping[key_itself])
        else:
            # Got None, cannot process this file.
            raise ParsingError(
                filepath=file,
                reason="Cannot find key elements while transforming traces.",
            )

    return tuple(converted)
def _XESTransformator__get_all_events(self, root: xml.etree.ElementTree.Element) ‑> List[xml.etree.ElementTree.Element]

Given the root element (should be trace), return a list of all event elements.

Parameters

root – Any trace element (<trace>) of an XES log file.

Returns a list of event elements (<event>).

Expand source code
def __get_all_events(self, root: ET.Element) -> List[ET.Element]:
    """
    Given the root element (should be trace), return a list of all event elements.

    Parameters:
      root -- Any trace element (`<trace>`) of an XES log file.

    Returns a list of event elements (`<event>`).
    """
    if "trace" not in root.tag:
        raise InvalidElementPassed(expected="trace", element=root.tag)

    return [elem for elem in root.iter() if "event" in elem.tag]
def _XESTransformator__get_all_traces(self, root: xml.etree.ElementTree.Element) ‑> List[xml.etree.ElementTree.Element]

Given the root element (should be log), return a list of all trace elements.

Parameters

root – The root element (<log>) of an XES log file.

Returns a list of trace elements (<trace>).

Expand source code
def __get_all_traces(self, root: ET.Element) -> List[ET.Element]:
    """
    Given the root element (should be log), return a list of all trace elements.

    Parameters:
      root -- The root element (`<log>`) of an XES log file.

    Returns a list of trace elements (`<trace>`).
    """
    if "log" not in root.tag:
        raise InvalidElementPassed(expected="log", element=root.tag)

    return [elem for elem in root.iter() if "trace" in elem.tag]
def _XESTransformator__make_log(self, root: xml.etree.ElementTree.Element, mapping: Dict[str, int], file: str) ‑> List[Tuple[int, ...]]

Makes a log, given a root element and a mapping dictionary.

Parameters

root – the root element (<log>) of an XES log. mapping – a mapping from key (XES concept:name) to integer. file – Path to file, used for logging.

Returns a transformed log.

Expand source code
def __make_log(
    self, root: ET.Element, mapping: Dict[str, int], file: str
) -> List[Tuple[int, ...]]:
    """
    Makes a log, given a root element and a mapping dictionary.

    Parameters:
      root -- the root element (`<log>`) of an XES log.
      mapping -- a mapping from key (XES concept:name) to integer.
      file -- Path to file, used for logging.

    Returns a transformed log.
    """

    # Check element tag
    if "log" not in root.tag:
        raise InvalidElementPassed(expected="log", element=root.tag)

    # Initialise empty log list
    log = []

    # Iterate over all traces
    for trace in self.__get_all_traces(root):
        # Convert a single trace and add it to the log
        converted_trace = self.__convert_trace(trace, mapping, file)
        log.append(converted_trace)

    return log
def _XESTransformator__parse(self, file: str) ‑> xml.etree.ElementTree.Element

Parses a file (that is either .xes or .xes.gz) with ElementTree and returns the root element.

Parameters

file – The file to parse.

Returns the root element (<log>) of an XES log.

Expand source code
def __parse(self, file: str) -> ET.Element:
    """
    Parses a file (that is either .xes or .xes.gz) with ElementTree and returns the root element.

    Parameters:
      file -- The file to parse.

    Returns the root element (`<log>`) of an XES log.
    """
    tree = self.__parse_with_ET(file, ".gz" in file)
    return tree.getroot()
def _XESTransformator__parse_with_ET(self, file: str, gzipped: bool = False) ‑> xml.etree.ElementTree.ElementTree

Parse the XML with ElementTree Distinguishes between gzipped and normal format.

Parameters

file – The file to parse. gzipped – Boolean indicating whether or not the file is gzipped. False by default.

Expand source code
def __parse_with_ET(self, file: str, gzipped: bool = False) -> ET.ElementTree:
    """
    Parse the XML with ElementTree
    Distinguishes between gzipped and normal format.

    Parameters:
      file -- The file to parse.
      gzipped -- Boolean indicating whether or not the file is gzipped.
                 False by default.
    """
    try:
        if gzipped:
            with gzip.open(file) as unzipped_file:
                return ET.parse(unzipped_file)
        return ET.parse(file)
    except ET.ParseError:
        raise ParsingError(
            filepath=file, reason="Element Tree ParseError was raised."
        )
def transform(self, log: str) ‑> List[Tuple[int, ...]]

Transforms a XES log into integer sequences.

Parameters

log – A logfile to transform to integer sequences.

Returnsa transformed log.

Expand source code
def transform(self, log: str) -> List[Tuple[int, ...]]:
    """
    Transforms a XES log into integer sequences.

    Parameters:
      log -- A logfile to transform to integer sequences.

    Returnsa transformed log.
    """
    # Check the log
    self.__check_log(log)

    # Parse the log if possible
    root = self.__parse(log)

    # Build the name mapping
    name_mapping = self.__build_mapping(root, log)

    # Build the log
    return self.__make_log(root, name_mapping, log)