Module integer_sequences.generator
Expand source code
from generator.XESTransformator import XESTransformator
from generator.SequenceGenerator import SequenceGenerator
from inspect import getmembers
__all__ = ["SequenceGenerator", "XESTransformator"]
# Override pdoc to also document private methods, but not __class__ methods.
__pdoc__ = {}
for cls in (XESTransformator, SequenceGenerator):
for name, value in getmembers(cls):
if name.startswith("_") and not name.endswith("_"):
__pdoc__[cls.__name__ + "." + name] = True
Classes
class SequenceGenerator (wanted_length: int = 10)
-
Generates a log of traces that originate from the same generating distribution / method.
Attributes
length
- The amount of items to generate.
config
- A config object that holds, per implemented metod, a list of required parameters and a reference to the method.
Initializes the SequenceGenerator class.
Parameters
wanted_length: The desired amount of items to generate. 10 by default.
Expand source code
class SequenceGenerator: """ Generates a log of traces that originate from the same generating distribution / method. Attributes: length: The amount of items to generate. config: A config object that holds, per implemented metod, a list of required parameters and a reference to the method. """ # Class Methods def __init__(self, wanted_length: int = 10): """ Initializes the SequenceGenerator class. Parameters: wanted_length: The desired amount of items to generate. 10 by default. """ # Test for invalid lengths if wanted_length <= 0: raise InvalidLengthException(wanted_length) # At this point we know that length is at least 1. self.length: int = wanted_length """The amount of items to generate.""" self.config: Dict[str, Dict[str, Any]] = { # fib = short term, multiple dependencies "fib": {"parameters": ["first", "second"], "method": self.__fib_wrapper}, "pascal": {"parameters": ["first"], "method": self.__pascal_wrapper}, "recaman": {"parameters": ["first"], "method": self.__recaman_wrapper}, "catalan": {"parameters": ["first"], "method": self.__catalan_wrapper}, "range_up": { "parameters": ["first", "step"], "method": self.__range_up_wrapper, }, "range_down": { "parameters": ["last", "step"], "method": self.__range_down_wrapper, }, # long term, multiple dependency "long_term_dependency": { "parameters": ["first", "second", "third", "fourth", "fifth"], "method": self.__long_term_dependency_wrapper, }, # long term, singular dependency "long_term_single_dependency": { "parameters": [ "first", "second", "third", "fourth", "fifth", "constant", ], "method": self.__long_term_single_dependency_wrapper, }, # short term, singular dependency "short_term_single_dependency": { "parameters": ["first", "constant"], "method": self.__short_term_single_dependency_wrapper, }, } """A config object that holds, per implemented metod, a list of required parameters and a reference to the method.""" def __repr__(self) -> str: return f"SequenceGenerator(length={self.length}, implemented_generators={self.get_generators()})" # Generator methods def __fib(self, first: int = 1, second: int = 1) -> Generator: """ Yield the first `self.length` numbers of the Fibonnaci sequence where the first term is `first` and the second term is `second`. Parameters: first: The first element of the sequence. second: The second element of the sequence. Returns a generator that generates the sequence. """ yield first # If you only want 1 number, for some reason? if self.length == 1: return yield second count = 0 while count < self.length - 2: # Increment count count += 1 # Compute next value current = first + second yield current # Update values first = second second = current def __fib_wrapper(self, params: Dict[str, int]) -> Generator: """ Wrapper method for `self.fib`. Written so we can have a unified interface to generate traces, given a sequence key. **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`. """ return self.__fib(first=params["first"], second=params["second"]) def __pascal(self, first: int = 1) -> Generator: """ Yield the first `self.length` numbers of the sequence defined by reading the pascal triangle from left to right, top to bottom, where the first integer is `first` (usually this is 1). Parameters: first: The first integer on top of the triangle, and consequently the first integer in the sequence. Returns a generator that generates the sequence. """ yield first # If for some reason you only want the first number? if self.length == 1: return def next_row(row: List[int]) -> List[int]: """ Computes the next row in the triangle of pascal. Parameters: row: The current row. Returns a the next row. """ lst = [] tmp = 0 for val in row: lst.append(tmp + val) tmp = val lst.append(first) return lst row = [first] # Keep track of counts, start at 1 counts = 1 while True: # compute the next row next = next_row(row) # loop through them for item in next: # test for length if counts < self.length: yield item counts += 1 else: return row = next def __pascal_wrapper(self, params: Dict[str, int]) -> Generator: """ Wrapper method for `self.pascal`. Written so we can have a unified interface to generate traces, given a sequence key. **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`. """ return self.__pascal(first=params["first"]) def __recaman(self, first: int = 0) -> Generator: """ Generator for the Recaman's sequence, a well known sequence from the on-line encyclopedia of integer sequences. Available [here](https://oeis.org/A005132). Parameters: first: The first element of the sequence. The original sequence defines this as 0. Returns a generator that generates the sequence. """ # If for some reason you only want the first number? if self.length == 1: yield first return # Keep track of a count and already seen digits count = 0 current = first already_seen = set([]) def get_next(current: int, index: int) -> int: """ Computes the next value in the sequence. Parameters: current: The current value. index: The index of the current value. Returns the new value. """ # Compute a(n) = a(n-1) - n # if nonnegative and not in sequence, return new_number = current - index if (new_number < 0) | (new_number in already_seen): # Negative or already seen: add index in stead new_number = current + index return new_number while count < self.length: # Compute next term in the sequence new = get_next(current, count) # Save it to the set of already seen terms already_seen.add(new) # Set current to new value and increase count count += 1 current = new # Finally, yield the correct value yield current def __recaman_wrapper(self, params: Dict[str, int]) -> Generator: """ Wrapper method for `self.recaman`. Written so we can have a unified interface to generate traces, given a sequence key. **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`. """ return self.__recaman(first=params["first"]) def __catalan(self, first: int = 1) -> Generator: """ Generates the Catalan numbers, where the first integer is parametrised. The catalan sequence is available [here](https://oeis.org/A000108). Implemented using dynamic programming as the direct formula has issues with `n > 30` In particular, 14544636039226909 became 14544636039226908 and all subsequent values were off. Parameters: first: The first element of the sequence. The original sequence defines this as 1. Returns a generator that generates the sequence. """ # Initialise dynamic programming table dp = [0] * (self.length + 1) dp[0] = first # in our case this is a parameter. By default it should be 1. dp[1] = first # in our case this is a parameter. By default it should be 1. # Fill the dp entries based on the recursive formula for i in range(2, self.length + 1): for j in range(i): dp[i] += dp[j] * dp[i - j - 1] # Loop through table, and yield the next item as long as the index does not exceed the length! for index in range(len(dp)): if index < self.length: yield dp[index] def __catalan_wrapper(self, params: Dict[str, int]) -> Generator: """ Wrapper method for `self.recaman`. Written so we can have a unified interface to generate traces, given a sequence key. **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`. """ return self.__catalan(first=params["first"]) def __range_up(self, first: int = 0, step: int = 1) -> Generator: """ Simple range generator that counts up. Parameters: first: The first element of the sequence. step: The stepsize passed to range Returns a generator that generates the sequence. """ # Use our own generator def range_generator(first: int, step: int) -> Generator: """ Internal range generator that counts up. """ i = first while True: yield i i += step count = 0 for i in range_generator(first, step): if count < self.length: yield i count += 1 else: break def __range_up_wrapper(self, params: Dict[str, int]) -> Generator: """ Wrapper method for `self.range_up`. Written so we can have a unified interface to generate traces, given a sequence key. **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`. """ return self.__range_up(first=params["first"], step=params["step"]) def __range_down(self, last: int = 0, step: int = 1) -> Generator: """ Simple range generator that counts down with stepsize `step` such that the last element will be `last`. Parameters: last: The last element of the sequence. step: The stepsize passed to range Returns a generator that generates the sequence. """ # Compute highest possible value such that we do not go negative first = last + step * self.length - 1 # Use our own generator def range_generator(first: int, step: int) -> Generator: """ Internal range generator that counts down. """ i = first while True: yield i i -= step count = 0 for i in range_generator(first, step): if count < self.length: yield i count += 1 else: break def __range_down_wrapper(self, params: Dict[str, int]) -> Generator: """ Wrapper method for `self.range_down`. Written so we can have a unified interface to generate traces, given a sequence key. **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`. """ return self.__range_down(last=params["last"], step=params["step"]) def __long_term_dependency( self, first: int = 0, second: int = 0, third: int = 0, fourth: int = 0, fifth: int = 0, ) -> Generator: """ Generalization of Fibonacci sequence with increased dependency, where F(n) = F(n-1) + F(n-5). In other words, the 6th term is equal to the sum of the 5th and the 1st. """ count = 0 # Basecases should_check = [first, second, third, fourth, fifth] for num in should_check: if count < self.length: yield num count += 1 else: return # Assume we want the next element in general case # Need to keep points to 5 previous elements n_minus_1 = fifth n_minus_2 = fourth n_minus_3 = third n_minus_4 = second n_minus_5 = first while count < self.length: # Increment count count += 1 # Compute next number and yield it n = n_minus_5 + n_minus_1 yield n # Update values n_minus_1, n_minus_2, n_minus_3, n_minus_4, n_minus_5 = ( n, n_minus_1, n_minus_2, n_minus_3, n_minus_4, ) def __long_term_dependency_wrapper(self, params: Dict[str, int]) -> Generator: """ Wrapper method for `self.long_term_dependency`. Written so we can have a unified interface to generate traces, given a sequence key. **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`. """ return self.__long_term_dependency( first=params["first"], second=params["second"], third=params["third"], fourth=params["fourth"], fifth=params["fifth"], ) def __long_term_single_dependency( self, first: int = 0, second: int = 0, third: int = 0, fourth: int = 0, fifth: int = 0, constant: int = 1, ) -> Generator: """ F(n) = F(n-5) * c. """ count = 0 # Basecases should_check = [first, second, third, fourth, fifth] for num in should_check: if count < self.length: yield num count += 1 else: return # Assume we want the next element in general case # Need to keep points to 5 previous elements n_minus_1 = fifth n_minus_2 = fourth n_minus_3 = third n_minus_4 = second n_minus_5 = first while count < self.length: # Increment count count += 1 # Compute next number and yield it n = n_minus_5 * constant yield n # Update values n_minus_1, n_minus_2, n_minus_3, n_minus_4, n_minus_5 = ( n, n_minus_1, n_minus_2, n_minus_3, n_minus_4, ) def __long_term_single_dependency_wrapper( self, params: Dict[str, int] ) -> Generator: """ Wrapper method for `self.__long_term_single_dependency`. Written so we can have a unified interface to generate traces, given a sequence key. **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`. """ return self.__long_term_single_dependency( first=params["first"], second=params["second"], third=params["third"], fourth=params["fourth"], fifth=params["fifth"], constant=params["constant"], ) def __short_term_single_dependency( self, first: int = 1, constant: int = 2 ) -> Generator: """ A short term dependency. F(n) = F(n-1) * c """ yield first # If you only want 1 number, for some reason? if self.length == 1: return count = 0 while count < self.length - 1: # Increment count count += 1 # Compute next value current = first * constant yield current # Update values first = current def __short_term_single_dependency_wrapper( self, params: Dict[str, int] ) -> Generator: """ Wrapper method for `self.range_down`. Written so we can have a unified interface to generate traces, given a sequence key. **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`. """ return self.__short_term_single_dependency( first=params["first"], constant=params["constant"] ) # Private getters def __get_params(self, seq_name: str) -> List[str]: """ Gets config parameters for a particul sequence generator. Parameters: seq_name: The name of the sequence generation method for which to retrieve parameters. Returns a list of parameters. """ params = [str(item) for item in self.config[seq_name]["parameters"]] return params def __get_method(self, seq_name: str) -> Callable[[Dict[str, int]], Generator]: """ Gets the method reference for a particular sequence generator. Parameters: seq_name: The name of the sequence generation method for which to retrieve a method reference. Returns a method reference. """ return self.config[seq_name]["method"] # Helper methods def __check_params(self, given: Dict[str, Any], required: List[str]) -> None: """ Checks correctness of supplied parameters to `self.generate_trace` or `self.generate_log`. Parameters: given: The given dictionary. required : The required items. Raises a ``MissingRequiredParameter`` when something that was required wasn't there. """ missing = [param for param in required if param not in given] if missing: raise MissingRequiredParameter(missing) def __build_params( self, given: Dict[str, Any], required: List[Any] ) -> Dict[str, Any]: """ Builds a keyword-argument dictionary given the parameters in `self.generate_trace` or `self.generate_log`. Parameters: given: The given dictionary. required : The required items. Returns a dictionary of the form: ``` { "param1" : value, "param2" : value, ... } ``` """ return { required_parameter: given[required_parameter] for required_parameter in required } def __build_param_matrix( self, givens: Dict[str, Any], requireds: List[str] ) -> List[Dict[str, int]]: """ Builds parameter list for usage in `self.generate_log`. Transforms a dictionary of shape: ``` { "required_param1" : [1, 2, ..], "required_param2" : [1, 2, 3, 4, ..], "required_param3" : [1, ..], .. } ``` to our wanted list of shape: ``` [ { "required_param1" : 1, "required_param2" : 1, "required_param3" : 1 }, ... { "required_param1" : 3, "required_param2" : 3, "required_param3" : 3 } ] ``` Parameters: given: The given dictionary. required : The required items. Returns a list of dictionaries as listed above. """ # Keep result variable result = [] # Create dictionary out of lists array_dict = self.__build_params(givens, requireds) # Compute the cartesian product using itertools compute_product_of_me = list(array_dict.values()) for tuple_of_vals in itertools.product(*compute_product_of_me): # tuple_of_vals: (a, b, c, ...) # len(item) == len(keys) # item[0] corresponds with keys[0] keys = [key[:-1] for key in array_dict.keys()] result.append({key: value for (key, value) in zip(keys, tuple_of_vals)}) return result def __check_length_with_params(self, seq_name: str) -> None: """ Checks whether or not we can mathematically generate a trace of length `self.length` given a particular generator, identified by `seq_name`. Parameters: seq_name: The name of the sequence generation method for which to perform this check. Raises an `InvalidLengthException` when a sequence cannot be generated due to mismatch of required params and wanted length. """ min_len_for_method = len(self.__get_params(seq_name)) if min_len_for_method > self.length: raise InvalidLengthException( length=min_len_for_method, message=f"Cannot generate sequence of length {self.length}\ if a method needs a minimum of %s parameters", ) # Public methods def get_generators(self) -> List[str]: """ Gets implemented generator functions. Returns a list of the names of implemented generator functions. """ return [generator for generator in self.config.keys()] def generate_trace(self, seq_name: str, **kwargs: Any) -> Generator: """ Generates a single trace corresponding to some sequence. Parameters: seq_name: The name of the sequence generation method for which to generate a trace. Raises a `NotYetImplemented` when the `seq_name` key does not correspond to a generator method. Raises a `MissingRequiredParameter` when a particular parameter was not provided. Returns a generator for a particular sequence. """ try: check_item_list(seq_name.strip().lower(), self.get_generators()) except MissingItem: raise NotYetImplemented(seq_name) # It exists, check for param mismatch required_params = self.__get_params(seq_name) self.__check_params(kwargs, required_params) # required (and possibly more) params present -- retrieve reference to generator method = self.__get_method(seq_name) # build params to pass through method_params = self.__build_params(kwargs, required_params) # call the function, and return its result return method(method_params) def generate_log(self, seq_name: str, **kwargs: Any) -> List[Tuple[int, ...]]: """ Generates an entire log corresponding to some sequence. Parameters: seq_name: The name of the sequence generation method for which to generate a log. Raises a `NotYetImplemented` when the `seq_name` key does not correspond to a generator method. Raises a `MissingRequiredParameter` when a particular parameter was not provided. Returns a log of traces a list of tuples. """ try: check_item_list(seq_name.strip().lower(), self.get_generators()) except MissingItem: raise NotYetImplemented(seq_name) required_params = [param + "s" for param in self.__get_params(seq_name)] self.__check_params(kwargs, required_params) self.__check_length_with_params(seq_name) # Create the log variable as a set log = [] for params in self.__build_param_matrix(kwargs, required_params): trace = tuple(self.generate_trace(seq_name, **params)) log.append(trace) return log
Instance variables
var config
-
A config object that holds, per implemented metod, a list of required parameters and a reference to the method.
var length
-
The amount of items to generate.
Methods
def _SequenceGenerator__build_param_matrix(self, givens: Dict[str, Any], requireds: List[str]) ‑> List[Dict[str, int]]
-
Builds parameter list for usage in
self.generate_log
.Transforms a dictionary of shape:
{ "required_param1" : [1, 2, ..], "required_param2" : [1, 2, 3, 4, ..], "required_param3" : [1, ..], .. }
to our wanted list of shape:
[ { "required_param1" : 1, "required_param2" : 1, "required_param3" : 1 }, ... { "required_param1" : 3, "required_param2" : 3, "required_param3" : 3 } ]
Parameters
given: The given dictionary. required : The required items.
Returns a list of dictionaries as listed above.
Expand source code
def __build_param_matrix( self, givens: Dict[str, Any], requireds: List[str] ) -> List[Dict[str, int]]: """ Builds parameter list for usage in `self.generate_log`. Transforms a dictionary of shape: ``` { "required_param1" : [1, 2, ..], "required_param2" : [1, 2, 3, 4, ..], "required_param3" : [1, ..], .. } ``` to our wanted list of shape: ``` [ { "required_param1" : 1, "required_param2" : 1, "required_param3" : 1 }, ... { "required_param1" : 3, "required_param2" : 3, "required_param3" : 3 } ] ``` Parameters: given: The given dictionary. required : The required items. Returns a list of dictionaries as listed above. """ # Keep result variable result = [] # Create dictionary out of lists array_dict = self.__build_params(givens, requireds) # Compute the cartesian product using itertools compute_product_of_me = list(array_dict.values()) for tuple_of_vals in itertools.product(*compute_product_of_me): # tuple_of_vals: (a, b, c, ...) # len(item) == len(keys) # item[0] corresponds with keys[0] keys = [key[:-1] for key in array_dict.keys()] result.append({key: value for (key, value) in zip(keys, tuple_of_vals)}) return result
def _SequenceGenerator__build_params(self, given: Dict[str, Any], required: List[Any]) ‑> Dict[str, Any]
-
Builds a keyword-argument dictionary given the parameters in
self.generate_trace
orself.generate_log
.Parameters
given: The given dictionary. required : The required items.
Returns a dictionary of the form:
{ "param1" : value, "param2" : value, ... }
Expand source code
def __build_params( self, given: Dict[str, Any], required: List[Any] ) -> Dict[str, Any]: """ Builds a keyword-argument dictionary given the parameters in `self.generate_trace` or `self.generate_log`. Parameters: given: The given dictionary. required : The required items. Returns a dictionary of the form: ``` { "param1" : value, "param2" : value, ... } ``` """ return { required_parameter: given[required_parameter] for required_parameter in required }
def _SequenceGenerator__catalan(self, first: int = 1) ‑> Generator
-
Generates the Catalan numbers, where the first integer is parametrised. The catalan sequence is available here.
Implemented using dynamic programming as the direct formula has issues with
n > 30
In particular, 14544636039226909 became 14544636039226908 and all subsequent values were off.Parameters
first: The first element of the sequence. The original sequence defines this as 1.
Returns a generator that generates the sequence.
Expand source code
def __catalan(self, first: int = 1) -> Generator: """ Generates the Catalan numbers, where the first integer is parametrised. The catalan sequence is available [here](https://oeis.org/A000108). Implemented using dynamic programming as the direct formula has issues with `n > 30` In particular, 14544636039226909 became 14544636039226908 and all subsequent values were off. Parameters: first: The first element of the sequence. The original sequence defines this as 1. Returns a generator that generates the sequence. """ # Initialise dynamic programming table dp = [0] * (self.length + 1) dp[0] = first # in our case this is a parameter. By default it should be 1. dp[1] = first # in our case this is a parameter. By default it should be 1. # Fill the dp entries based on the recursive formula for i in range(2, self.length + 1): for j in range(i): dp[i] += dp[j] * dp[i - j - 1] # Loop through table, and yield the next item as long as the index does not exceed the length! for index in range(len(dp)): if index < self.length: yield dp[index]
def _SequenceGenerator__catalan_wrapper(self, params: Dict[str, int]) ‑> Generator
-
Wrapper method for
self.recaman
. Written so we can have a unified interface to generate traces, given a sequence key.Unsafe when used in any other place than the generation config dict
SequenceGenerator.config
.Expand source code
def __catalan_wrapper(self, params: Dict[str, int]) -> Generator: """ Wrapper method for `self.recaman`. Written so we can have a unified interface to generate traces, given a sequence key. **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`. """ return self.__catalan(first=params["first"])
def _SequenceGenerator__check_length_with_params(self, seq_name: str) ‑> NoneType
-
Checks whether or not we can mathematically generate a trace of length
self.length
given a particular generator, identified byseq_name
.Parameters
seq_name: The name of the sequence generation method for which to perform this check.
Raises an
InvalidLengthException
when a sequence cannot be generated due to mismatch of required params and wanted length.Expand source code
def __check_length_with_params(self, seq_name: str) -> None: """ Checks whether or not we can mathematically generate a trace of length `self.length` given a particular generator, identified by `seq_name`. Parameters: seq_name: The name of the sequence generation method for which to perform this check. Raises an `InvalidLengthException` when a sequence cannot be generated due to mismatch of required params and wanted length. """ min_len_for_method = len(self.__get_params(seq_name)) if min_len_for_method > self.length: raise InvalidLengthException( length=min_len_for_method, message=f"Cannot generate sequence of length {self.length}\ if a method needs a minimum of %s parameters", )
def _SequenceGenerator__check_params(self, given: Dict[str, Any], required: List[str]) ‑> NoneType
-
Checks correctness of supplied parameters to
self.generate_trace
orself.generate_log
.Parameters
given: The given dictionary. required : The required items.
Raises a
MissingRequiredParameter
when something that was required wasn't there.Expand source code
def __check_params(self, given: Dict[str, Any], required: List[str]) -> None: """ Checks correctness of supplied parameters to `self.generate_trace` or `self.generate_log`. Parameters: given: The given dictionary. required : The required items. Raises a ``MissingRequiredParameter`` when something that was required wasn't there. """ missing = [param for param in required if param not in given] if missing: raise MissingRequiredParameter(missing)
def _SequenceGenerator__fib(self, first: int = 1, second: int = 1) ‑> Generator
-
Yield the first
self.length
numbers of the Fibonnaci sequence where the first term isfirst
and the second term issecond
.Parameters
first: The first element of the sequence. second: The second element of the sequence.
Returns a generator that generates the sequence.
Expand source code
def __fib(self, first: int = 1, second: int = 1) -> Generator: """ Yield the first `self.length` numbers of the Fibonnaci sequence where the first term is `first` and the second term is `second`. Parameters: first: The first element of the sequence. second: The second element of the sequence. Returns a generator that generates the sequence. """ yield first # If you only want 1 number, for some reason? if self.length == 1: return yield second count = 0 while count < self.length - 2: # Increment count count += 1 # Compute next value current = first + second yield current # Update values first = second second = current
def _SequenceGenerator__fib_wrapper(self, params: Dict[str, int]) ‑> Generator
-
Wrapper method for
self.fib
. Written so we can have a unified interface to generate traces, given a sequence key.Unsafe when used in any other place than the generation config dict
SequenceGenerator.config
.Expand source code
def __fib_wrapper(self, params: Dict[str, int]) -> Generator: """ Wrapper method for `self.fib`. Written so we can have a unified interface to generate traces, given a sequence key. **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`. """ return self.__fib(first=params["first"], second=params["second"])
def _SequenceGenerator__get_method(self, seq_name: str) ‑> Callable[[Dict[str, int]], Generator]
-
Gets the method reference for a particular sequence generator.
Parameters
seq_name: The name of the sequence generation method for which to retrieve a method reference.
Returns a method reference.
Expand source code
def __get_method(self, seq_name: str) -> Callable[[Dict[str, int]], Generator]: """ Gets the method reference for a particular sequence generator. Parameters: seq_name: The name of the sequence generation method for which to retrieve a method reference. Returns a method reference. """ return self.config[seq_name]["method"]
def _SequenceGenerator__get_params(self, seq_name: str) ‑> List[str]
-
Gets config parameters for a particul sequence generator.
Parameters
seq_name: The name of the sequence generation method for which to retrieve parameters.
Returns a list of parameters.
Expand source code
def __get_params(self, seq_name: str) -> List[str]: """ Gets config parameters for a particul sequence generator. Parameters: seq_name: The name of the sequence generation method for which to retrieve parameters. Returns a list of parameters. """ params = [str(item) for item in self.config[seq_name]["parameters"]] return params
def _SequenceGenerator__long_term_dependency(self, first: int = 0, second: int = 0, third: int = 0, fourth: int = 0, fifth: int = 0) ‑> Generator
-
Generalization of Fibonacci sequence with increased dependency, where F(n) = F(n-1) + F(n-5). In other words, the 6th term is equal to the sum of the 5th and the 1st.
Expand source code
def __long_term_dependency( self, first: int = 0, second: int = 0, third: int = 0, fourth: int = 0, fifth: int = 0, ) -> Generator: """ Generalization of Fibonacci sequence with increased dependency, where F(n) = F(n-1) + F(n-5). In other words, the 6th term is equal to the sum of the 5th and the 1st. """ count = 0 # Basecases should_check = [first, second, third, fourth, fifth] for num in should_check: if count < self.length: yield num count += 1 else: return # Assume we want the next element in general case # Need to keep points to 5 previous elements n_minus_1 = fifth n_minus_2 = fourth n_minus_3 = third n_minus_4 = second n_minus_5 = first while count < self.length: # Increment count count += 1 # Compute next number and yield it n = n_minus_5 + n_minus_1 yield n # Update values n_minus_1, n_minus_2, n_minus_3, n_minus_4, n_minus_5 = ( n, n_minus_1, n_minus_2, n_minus_3, n_minus_4, )
def _SequenceGenerator__long_term_dependency_wrapper(self, params: Dict[str, int]) ‑> Generator
-
Wrapper method for
self.long_term_dependency
. Written so we can have a unified interface to generate traces, given a sequence key.Unsafe when used in any other place than the generation config dict
SequenceGenerator.config
.Expand source code
def __long_term_dependency_wrapper(self, params: Dict[str, int]) -> Generator: """ Wrapper method for `self.long_term_dependency`. Written so we can have a unified interface to generate traces, given a sequence key. **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`. """ return self.__long_term_dependency( first=params["first"], second=params["second"], third=params["third"], fourth=params["fourth"], fifth=params["fifth"], )
def _SequenceGenerator__long_term_single_dependency(self, first: int = 0, second: int = 0, third: int = 0, fourth: int = 0, fifth: int = 0, constant: int = 1) ‑> Generator
-
F(n) = F(n-5) * c.
Expand source code
def __long_term_single_dependency( self, first: int = 0, second: int = 0, third: int = 0, fourth: int = 0, fifth: int = 0, constant: int = 1, ) -> Generator: """ F(n) = F(n-5) * c. """ count = 0 # Basecases should_check = [first, second, third, fourth, fifth] for num in should_check: if count < self.length: yield num count += 1 else: return # Assume we want the next element in general case # Need to keep points to 5 previous elements n_minus_1 = fifth n_minus_2 = fourth n_minus_3 = third n_minus_4 = second n_minus_5 = first while count < self.length: # Increment count count += 1 # Compute next number and yield it n = n_minus_5 * constant yield n # Update values n_minus_1, n_minus_2, n_minus_3, n_minus_4, n_minus_5 = ( n, n_minus_1, n_minus_2, n_minus_3, n_minus_4, )
def _SequenceGenerator__long_term_single_dependency_wrapper(self, params: Dict[str, int]) ‑> Generator
-
Wrapper method for
self.__long_term_single_dependency
. Written so we can have a unified interface to generate traces, given a sequence key.Unsafe when used in any other place than the generation config dict
SequenceGenerator.config
.Expand source code
def __long_term_single_dependency_wrapper( self, params: Dict[str, int] ) -> Generator: """ Wrapper method for `self.__long_term_single_dependency`. Written so we can have a unified interface to generate traces, given a sequence key. **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`. """ return self.__long_term_single_dependency( first=params["first"], second=params["second"], third=params["third"], fourth=params["fourth"], fifth=params["fifth"], constant=params["constant"], )
def _SequenceGenerator__pascal(self, first: int = 1) ‑> Generator
-
Yield the first
self.length
numbers of the sequence defined by reading the pascal triangle from left to right, top to bottom, where the first integer isfirst
(usually this is 1).Parameters
first: The first integer on top of the triangle, and consequently the first integer in the sequence.
Returns a generator that generates the sequence.
Expand source code
def __pascal(self, first: int = 1) -> Generator: """ Yield the first `self.length` numbers of the sequence defined by reading the pascal triangle from left to right, top to bottom, where the first integer is `first` (usually this is 1). Parameters: first: The first integer on top of the triangle, and consequently the first integer in the sequence. Returns a generator that generates the sequence. """ yield first # If for some reason you only want the first number? if self.length == 1: return def next_row(row: List[int]) -> List[int]: """ Computes the next row in the triangle of pascal. Parameters: row: The current row. Returns a the next row. """ lst = [] tmp = 0 for val in row: lst.append(tmp + val) tmp = val lst.append(first) return lst row = [first] # Keep track of counts, start at 1 counts = 1 while True: # compute the next row next = next_row(row) # loop through them for item in next: # test for length if counts < self.length: yield item counts += 1 else: return row = next
def _SequenceGenerator__pascal_wrapper(self, params: Dict[str, int]) ‑> Generator
-
Wrapper method for
self.pascal
. Written so we can have a unified interface to generate traces, given a sequence key.Unsafe when used in any other place than the generation config dict
SequenceGenerator.config
.Expand source code
def __pascal_wrapper(self, params: Dict[str, int]) -> Generator: """ Wrapper method for `self.pascal`. Written so we can have a unified interface to generate traces, given a sequence key. **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`. """ return self.__pascal(first=params["first"])
def _SequenceGenerator__range_down(self, last: int = 0, step: int = 1) ‑> Generator
-
Simple range generator that counts down with stepsize
step
such that the last element will belast
.Parameters
last: The last element of the sequence. step: The stepsize passed to range
Returns a generator that generates the sequence.
Expand source code
def __range_down(self, last: int = 0, step: int = 1) -> Generator: """ Simple range generator that counts down with stepsize `step` such that the last element will be `last`. Parameters: last: The last element of the sequence. step: The stepsize passed to range Returns a generator that generates the sequence. """ # Compute highest possible value such that we do not go negative first = last + step * self.length - 1 # Use our own generator def range_generator(first: int, step: int) -> Generator: """ Internal range generator that counts down. """ i = first while True: yield i i -= step count = 0 for i in range_generator(first, step): if count < self.length: yield i count += 1 else: break
def _SequenceGenerator__range_down_wrapper(self, params: Dict[str, int]) ‑> Generator
-
Wrapper method for
self.range_down
. Written so we can have a unified interface to generate traces, given a sequence key.Unsafe when used in any other place than the generation config dict
SequenceGenerator.config
.Expand source code
def __range_down_wrapper(self, params: Dict[str, int]) -> Generator: """ Wrapper method for `self.range_down`. Written so we can have a unified interface to generate traces, given a sequence key. **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`. """ return self.__range_down(last=params["last"], step=params["step"])
def _SequenceGenerator__range_up(self, first: int = 0, step: int = 1) ‑> Generator
-
Simple range generator that counts up.
Parameters
first: The first element of the sequence. step: The stepsize passed to range
Returns a generator that generates the sequence.
Expand source code
def __range_up(self, first: int = 0, step: int = 1) -> Generator: """ Simple range generator that counts up. Parameters: first: The first element of the sequence. step: The stepsize passed to range Returns a generator that generates the sequence. """ # Use our own generator def range_generator(first: int, step: int) -> Generator: """ Internal range generator that counts up. """ i = first while True: yield i i += step count = 0 for i in range_generator(first, step): if count < self.length: yield i count += 1 else: break
def _SequenceGenerator__range_up_wrapper(self, params: Dict[str, int]) ‑> Generator
-
Wrapper method for
self.range_up
. Written so we can have a unified interface to generate traces, given a sequence key.Unsafe when used in any other place than the generation config dict
SequenceGenerator.config
.Expand source code
def __range_up_wrapper(self, params: Dict[str, int]) -> Generator: """ Wrapper method for `self.range_up`. Written so we can have a unified interface to generate traces, given a sequence key. **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`. """ return self.__range_up(first=params["first"], step=params["step"])
def _SequenceGenerator__recaman(self, first: int = 0) ‑> Generator
-
Generator for the Recaman's sequence, a well known sequence from the on-line encyclopedia of integer sequences. Available here.
Parameters
first: The first element of the sequence. The original sequence defines this as 0.
Returns a generator that generates the sequence.
Expand source code
def __recaman(self, first: int = 0) -> Generator: """ Generator for the Recaman's sequence, a well known sequence from the on-line encyclopedia of integer sequences. Available [here](https://oeis.org/A005132). Parameters: first: The first element of the sequence. The original sequence defines this as 0. Returns a generator that generates the sequence. """ # If for some reason you only want the first number? if self.length == 1: yield first return # Keep track of a count and already seen digits count = 0 current = first already_seen = set([]) def get_next(current: int, index: int) -> int: """ Computes the next value in the sequence. Parameters: current: The current value. index: The index of the current value. Returns the new value. """ # Compute a(n) = a(n-1) - n # if nonnegative and not in sequence, return new_number = current - index if (new_number < 0) | (new_number in already_seen): # Negative or already seen: add index in stead new_number = current + index return new_number while count < self.length: # Compute next term in the sequence new = get_next(current, count) # Save it to the set of already seen terms already_seen.add(new) # Set current to new value and increase count count += 1 current = new # Finally, yield the correct value yield current
def _SequenceGenerator__recaman_wrapper(self, params: Dict[str, int]) ‑> Generator
-
Wrapper method for
self.recaman
. Written so we can have a unified interface to generate traces, given a sequence key.Unsafe when used in any other place than the generation config dict
SequenceGenerator.config
.Expand source code
def __recaman_wrapper(self, params: Dict[str, int]) -> Generator: """ Wrapper method for `self.recaman`. Written so we can have a unified interface to generate traces, given a sequence key. **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`. """ return self.__recaman(first=params["first"])
def _SequenceGenerator__short_term_single_dependency(self, first: int = 1, constant: int = 2) ‑> Generator
-
A short term dependency. F(n) = F(n-1) * c
Expand source code
def __short_term_single_dependency( self, first: int = 1, constant: int = 2 ) -> Generator: """ A short term dependency. F(n) = F(n-1) * c """ yield first # If you only want 1 number, for some reason? if self.length == 1: return count = 0 while count < self.length - 1: # Increment count count += 1 # Compute next value current = first * constant yield current # Update values first = current
def _SequenceGenerator__short_term_single_dependency_wrapper(self, params: Dict[str, int]) ‑> Generator
-
Wrapper method for
self.range_down
. Written so we can have a unified interface to generate traces, given a sequence key.Unsafe when used in any other place than the generation config dict
SequenceGenerator.config
.Expand source code
def __short_term_single_dependency_wrapper( self, params: Dict[str, int] ) -> Generator: """ Wrapper method for `self.range_down`. Written so we can have a unified interface to generate traces, given a sequence key. **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`. """ return self.__short_term_single_dependency( first=params["first"], constant=params["constant"] )
def generate_log(self, seq_name: str, **kwargs: Any) ‑> List[Tuple[int, ...]]
-
Generates an entire log corresponding to some sequence.
Parameters
seq_name: The name of the sequence generation method for which to generate a log.
Raises a
NotYetImplemented
when theseq_name
key does not correspond to a generator method. Raises aMissingRequiredParameter
when a particular parameter was not provided.Returns a log of traces a list of tuples.
Expand source code
def generate_log(self, seq_name: str, **kwargs: Any) -> List[Tuple[int, ...]]: """ Generates an entire log corresponding to some sequence. Parameters: seq_name: The name of the sequence generation method for which to generate a log. Raises a `NotYetImplemented` when the `seq_name` key does not correspond to a generator method. Raises a `MissingRequiredParameter` when a particular parameter was not provided. Returns a log of traces a list of tuples. """ try: check_item_list(seq_name.strip().lower(), self.get_generators()) except MissingItem: raise NotYetImplemented(seq_name) required_params = [param + "s" for param in self.__get_params(seq_name)] self.__check_params(kwargs, required_params) self.__check_length_with_params(seq_name) # Create the log variable as a set log = [] for params in self.__build_param_matrix(kwargs, required_params): trace = tuple(self.generate_trace(seq_name, **params)) log.append(trace) return log
def generate_trace(self, seq_name: str, **kwargs: Any) ‑> Generator
-
Generates a single trace corresponding to some sequence.
Parameters
seq_name: The name of the sequence generation method for which to generate a trace.
Raises a
NotYetImplemented
when theseq_name
key does not correspond to a generator method. Raises aMissingRequiredParameter
when a particular parameter was not provided.Returns a generator for a particular sequence.
Expand source code
def generate_trace(self, seq_name: str, **kwargs: Any) -> Generator: """ Generates a single trace corresponding to some sequence. Parameters: seq_name: The name of the sequence generation method for which to generate a trace. Raises a `NotYetImplemented` when the `seq_name` key does not correspond to a generator method. Raises a `MissingRequiredParameter` when a particular parameter was not provided. Returns a generator for a particular sequence. """ try: check_item_list(seq_name.strip().lower(), self.get_generators()) except MissingItem: raise NotYetImplemented(seq_name) # It exists, check for param mismatch required_params = self.__get_params(seq_name) self.__check_params(kwargs, required_params) # required (and possibly more) params present -- retrieve reference to generator method = self.__get_method(seq_name) # build params to pass through method_params = self.__build_params(kwargs, required_params) # call the function, and return its result return method(method_params)
def get_generators(self) ‑> List[str]
-
Gets implemented generator functions.
Returns a list of the names of implemented generator functions.
Expand source code
def get_generators(self) -> List[str]: """ Gets implemented generator functions. Returns a list of the names of implemented generator functions. """ return [generator for generator in self.config.keys()]
class XESTransformator
-
Transforms a process mining log into integer sequences. Can only handle extensions present in
readable_exts
.Attributes
readable_exts – All extensions that this transformator can handle. Initialises the XESTransformator class.
Expand source code
class XESTransformator: """ Transforms a process mining log into integer sequences. Can only handle extensions present in `readable_exts`. Attributes: readable_exts -- All extensions that this transformator can handle. """ # Class Methods def __init__(self) -> None: """ Initialises the XESTransformator class. """ self.readable_exts = [".xes", ".xes.gz"] """All extensions that this transformator can handle.""" def __repr__(self) -> str: return f"XESTransformator(readable_exts={self.readable_exts})" # Helper methods def __check_log(self, file: str) -> None: """ Performs some checks on the file and raises exceptions when something is wrong. In particular: Check 0: Check if it is a file. Check 1: Check if the file is readable. Check 2: Check if the extension is one that we can parse. Parameters: file -- The file (as string) to check. Can be relative or absolute path. Raises `FileNotFoundError` if check 0 fails. Raises `PermissionError` if check 1 fails. Raises `InvalidLogFormat` if check 2 fails. """ # Check 0: Did we get a file? if not isfile(file): raise FileNotFoundError(file) # Check 1: Can we read the file? if not access(file, R_OK): raise PermissionError(file) # Check 2: Is the extension in one of the readable ones? if not any([True for ext in self.readable_exts if ext in file]): raise InvalidLogFormat(filepath=file) def __build_mapping(self, root: ET.Element, file: str) -> Dict[str, int]: """ Builds a mapping from key (XES concept:name) to integer. Parameters: root -- The root element (`<log>`) of an XES log file. file -- Path to the file, used for reporting errors. Returns a dictionary mapping a string (key) to an integer. """ # Check element tag if "log" not in root.tag: raise InvalidElementPassed(expected="log", element=root.tag) # Retrieve element by XPath dictionary_root_field = root.find( ".//*[@key='meta_concept:named_events_total']" ) if dictionary_root_field is None: raise ParsingError( filepath=file, reason="'meta_concept:named_events_total' is not" + "present in the log. Does your log adhere to the OpenXES standard?", ) # Initialise the mapping as empty dictionary mapping = dict() # Loop through all elements for index, element in enumerate(dictionary_root_field.iter()): # Skip the root item itself. if element != dictionary_root_field: # Retrieve the key from element attributes key = element.attrib["key"] # Set the mapping entry to an integer (index is used) mapping[key] = index return mapping def __get_all_traces(self, root: ET.Element) -> List[ET.Element]: """ Given the root element (should be log), return a list of all trace elements. Parameters: root -- The root element (`<log>`) of an XES log file. Returns a list of trace elements (`<trace>`). """ if "log" not in root.tag: raise InvalidElementPassed(expected="log", element=root.tag) return [elem for elem in root.iter() if "trace" in elem.tag] def __get_all_events(self, root: ET.Element) -> List[ET.Element]: """ Given the root element (should be trace), return a list of all event elements. Parameters: root -- Any trace element (`<trace>`) of an XES log file. Returns a list of event elements (`<event>`). """ if "trace" not in root.tag: raise InvalidElementPassed(expected="trace", element=root.tag) return [elem for elem in root.iter() if "event" in elem.tag] # Parsing methods def __parse_with_ET(self, file: str, gzipped: bool = False) -> ET.ElementTree: """ Parse the XML with ElementTree Distinguishes between gzipped and normal format. Parameters: file -- The file to parse. gzipped -- Boolean indicating whether or not the file is gzipped. False by default. """ try: if gzipped: with gzip.open(file) as unzipped_file: return ET.parse(unzipped_file) return ET.parse(file) except ET.ParseError: raise ParsingError( filepath=file, reason="Element Tree ParseError was raised." ) def __parse(self, file: str) -> ET.Element: """ Parses a file (that is either .xes or .xes.gz) with ElementTree and returns the root element. Parameters: file -- The file to parse. Returns the root element (`<log>`) of an XES log. """ tree = self.__parse_with_ET(file, ".gz" in file) return tree.getroot() # Transforming methods def __convert_trace( self, trace: ET.Element, mapping: Dict[str, int], file: str ) -> Tuple[int, ...]: """ Converts a single trace element into a tuple of integers. Parameters: trace -- The trace element (`<trace>`) to convert. mapping -- The mapping that defines how to convert. file -- Path to file, used for logging. Returns a tuple of integers representing a trace according to some mapping. """ # Check element tag if "trace" not in trace.tag: raise InvalidElementPassed(expected="trace", element=trace.tag) converted = [] for event in self.__get_all_events(trace): key_element = event.find(".//*[@key='concept:name']") # Explicit not None check -> Ensures that key element is of type Element. if key_element is not None: key_itself = key_element.attrib["value"] converted.append(mapping[key_itself]) else: # Got None, cannot process this file. raise ParsingError( filepath=file, reason="Cannot find key elements while transforming traces.", ) return tuple(converted) def __make_log( self, root: ET.Element, mapping: Dict[str, int], file: str ) -> List[Tuple[int, ...]]: """ Makes a log, given a root element and a mapping dictionary. Parameters: root -- the root element (`<log>`) of an XES log. mapping -- a mapping from key (XES concept:name) to integer. file -- Path to file, used for logging. Returns a transformed log. """ # Check element tag if "log" not in root.tag: raise InvalidElementPassed(expected="log", element=root.tag) # Initialise empty log list log = [] # Iterate over all traces for trace in self.__get_all_traces(root): # Convert a single trace and add it to the log converted_trace = self.__convert_trace(trace, mapping, file) log.append(converted_trace) return log def transform(self, log: str) -> List[Tuple[int, ...]]: """ Transforms a XES log into integer sequences. Parameters: log -- A logfile to transform to integer sequences. Returnsa transformed log. """ # Check the log self.__check_log(log) # Parse the log if possible root = self.__parse(log) # Build the name mapping name_mapping = self.__build_mapping(root, log) # Build the log return self.__make_log(root, name_mapping, log)
Instance variables
var readable_exts
-
All extensions that this transformator can handle.
Methods
def _XESTransformator__build_mapping(self, root: xml.etree.ElementTree.Element, file: str) ‑> Dict[str, int]
-
Builds a mapping from key (XES concept:name) to integer.
Parameters
root – The root element (
<log>
) of an XES log file. file – Path to the file, used for reporting errors.Returns a dictionary mapping a string (key) to an integer.
Expand source code
def __build_mapping(self, root: ET.Element, file: str) -> Dict[str, int]: """ Builds a mapping from key (XES concept:name) to integer. Parameters: root -- The root element (`<log>`) of an XES log file. file -- Path to the file, used for reporting errors. Returns a dictionary mapping a string (key) to an integer. """ # Check element tag if "log" not in root.tag: raise InvalidElementPassed(expected="log", element=root.tag) # Retrieve element by XPath dictionary_root_field = root.find( ".//*[@key='meta_concept:named_events_total']" ) if dictionary_root_field is None: raise ParsingError( filepath=file, reason="'meta_concept:named_events_total' is not" + "present in the log. Does your log adhere to the OpenXES standard?", ) # Initialise the mapping as empty dictionary mapping = dict() # Loop through all elements for index, element in enumerate(dictionary_root_field.iter()): # Skip the root item itself. if element != dictionary_root_field: # Retrieve the key from element attributes key = element.attrib["key"] # Set the mapping entry to an integer (index is used) mapping[key] = index return mapping
def _XESTransformator__check_log(self, file: str) ‑> NoneType
-
Performs some checks on the file and raises exceptions when something is wrong.
In particular: Check 0: Check if it is a file. Check 1: Check if the file is readable. Check 2: Check if the extension is one that we can parse.
Parameters
file – The file (as string) to check. Can be relative or absolute path.
Raises
FileNotFoundError
if check 0 fails. RaisesPermissionError
if check 1 fails. RaisesInvalidLogFormat
if check 2 fails.Expand source code
def __check_log(self, file: str) -> None: """ Performs some checks on the file and raises exceptions when something is wrong. In particular: Check 0: Check if it is a file. Check 1: Check if the file is readable. Check 2: Check if the extension is one that we can parse. Parameters: file -- The file (as string) to check. Can be relative or absolute path. Raises `FileNotFoundError` if check 0 fails. Raises `PermissionError` if check 1 fails. Raises `InvalidLogFormat` if check 2 fails. """ # Check 0: Did we get a file? if not isfile(file): raise FileNotFoundError(file) # Check 1: Can we read the file? if not access(file, R_OK): raise PermissionError(file) # Check 2: Is the extension in one of the readable ones? if not any([True for ext in self.readable_exts if ext in file]): raise InvalidLogFormat(filepath=file)
def _XESTransformator__convert_trace(self, trace: xml.etree.ElementTree.Element, mapping: Dict[str, int], file: str) ‑> Tuple[int, ...]
-
Converts a single trace element into a tuple of integers.
Parameters
trace – The trace element (
<trace>
) to convert. mapping – The mapping that defines how to convert. file – Path to file, used for logging.Returns a tuple of integers representing a trace according to some mapping.
Expand source code
def __convert_trace( self, trace: ET.Element, mapping: Dict[str, int], file: str ) -> Tuple[int, ...]: """ Converts a single trace element into a tuple of integers. Parameters: trace -- The trace element (`<trace>`) to convert. mapping -- The mapping that defines how to convert. file -- Path to file, used for logging. Returns a tuple of integers representing a trace according to some mapping. """ # Check element tag if "trace" not in trace.tag: raise InvalidElementPassed(expected="trace", element=trace.tag) converted = [] for event in self.__get_all_events(trace): key_element = event.find(".//*[@key='concept:name']") # Explicit not None check -> Ensures that key element is of type Element. if key_element is not None: key_itself = key_element.attrib["value"] converted.append(mapping[key_itself]) else: # Got None, cannot process this file. raise ParsingError( filepath=file, reason="Cannot find key elements while transforming traces.", ) return tuple(converted)
def _XESTransformator__get_all_events(self, root: xml.etree.ElementTree.Element) ‑> List[xml.etree.ElementTree.Element]
-
Given the root element (should be trace), return a list of all event elements.
Parameters
root – Any trace element (
<trace>
) of an XES log file.Returns a list of event elements (
<event>
).Expand source code
def __get_all_events(self, root: ET.Element) -> List[ET.Element]: """ Given the root element (should be trace), return a list of all event elements. Parameters: root -- Any trace element (`<trace>`) of an XES log file. Returns a list of event elements (`<event>`). """ if "trace" not in root.tag: raise InvalidElementPassed(expected="trace", element=root.tag) return [elem for elem in root.iter() if "event" in elem.tag]
def _XESTransformator__get_all_traces(self, root: xml.etree.ElementTree.Element) ‑> List[xml.etree.ElementTree.Element]
-
Given the root element (should be log), return a list of all trace elements.
Parameters
root – The root element (
<log>
) of an XES log file.Returns a list of trace elements (
<trace>
).Expand source code
def __get_all_traces(self, root: ET.Element) -> List[ET.Element]: """ Given the root element (should be log), return a list of all trace elements. Parameters: root -- The root element (`<log>`) of an XES log file. Returns a list of trace elements (`<trace>`). """ if "log" not in root.tag: raise InvalidElementPassed(expected="log", element=root.tag) return [elem for elem in root.iter() if "trace" in elem.tag]
def _XESTransformator__make_log(self, root: xml.etree.ElementTree.Element, mapping: Dict[str, int], file: str) ‑> List[Tuple[int, ...]]
-
Makes a log, given a root element and a mapping dictionary.
Parameters
root – the root element (
<log>
) of an XES log. mapping – a mapping from key (XES concept:name) to integer. file – Path to file, used for logging.Returns a transformed log.
Expand source code
def __make_log( self, root: ET.Element, mapping: Dict[str, int], file: str ) -> List[Tuple[int, ...]]: """ Makes a log, given a root element and a mapping dictionary. Parameters: root -- the root element (`<log>`) of an XES log. mapping -- a mapping from key (XES concept:name) to integer. file -- Path to file, used for logging. Returns a transformed log. """ # Check element tag if "log" not in root.tag: raise InvalidElementPassed(expected="log", element=root.tag) # Initialise empty log list log = [] # Iterate over all traces for trace in self.__get_all_traces(root): # Convert a single trace and add it to the log converted_trace = self.__convert_trace(trace, mapping, file) log.append(converted_trace) return log
def _XESTransformator__parse(self, file: str) ‑> xml.etree.ElementTree.Element
-
Parses a file (that is either .xes or .xes.gz) with ElementTree and returns the root element.
Parameters
file – The file to parse.
Returns the root element (
<log>
) of an XES log.Expand source code
def __parse(self, file: str) -> ET.Element: """ Parses a file (that is either .xes or .xes.gz) with ElementTree and returns the root element. Parameters: file -- The file to parse. Returns the root element (`<log>`) of an XES log. """ tree = self.__parse_with_ET(file, ".gz" in file) return tree.getroot()
def _XESTransformator__parse_with_ET(self, file: str, gzipped: bool = False) ‑> xml.etree.ElementTree.ElementTree
-
Parse the XML with ElementTree Distinguishes between gzipped and normal format.
Parameters
file – The file to parse. gzipped – Boolean indicating whether or not the file is gzipped. False by default.
Expand source code
def __parse_with_ET(self, file: str, gzipped: bool = False) -> ET.ElementTree: """ Parse the XML with ElementTree Distinguishes between gzipped and normal format. Parameters: file -- The file to parse. gzipped -- Boolean indicating whether or not the file is gzipped. False by default. """ try: if gzipped: with gzip.open(file) as unzipped_file: return ET.parse(unzipped_file) return ET.parse(file) except ET.ParseError: raise ParsingError( filepath=file, reason="Element Tree ParseError was raised." )
def transform(self, log: str) ‑> List[Tuple[int, ...]]
-
Transforms a XES log into integer sequences.
Parameters
log – A logfile to transform to integer sequences.
Returnsa transformed log.
Expand source code
def transform(self, log: str) -> List[Tuple[int, ...]]: """ Transforms a XES log into integer sequences. Parameters: log -- A logfile to transform to integer sequences. Returnsa transformed log. """ # Check the log self.__check_log(log) # Parse the log if possible root = self.__parse(log) # Build the name mapping name_mapping = self.__build_mapping(root, log) # Build the log return self.__make_log(root, name_mapping, log)