Module integer_sequences.generator
Expand source code
from generator.XESTransformator import XESTransformator
from generator.SequenceGenerator import SequenceGenerator
from inspect import getmembers
__all__ = ["SequenceGenerator", "XESTransformator"]
# Override pdoc to also document private methods, but not __class__ methods.
__pdoc__ = {}
for cls in (XESTransformator, SequenceGenerator):
for name, value in getmembers(cls):
if name.startswith("_") and not name.endswith("_"):
__pdoc__[cls.__name__ + "." + name] = True
Classes
class SequenceGenerator (wanted_length: int = 10)-
Generates a log of traces that originate from the same generating distribution / method.
Attributes
length- The amount of items to generate.
config- A config object that holds, per implemented metod, a list of required parameters and a reference to the method.
Initializes the SequenceGenerator class.
Parameters
wanted_length: The desired amount of items to generate. 10 by default.
Expand source code
class SequenceGenerator: """ Generates a log of traces that originate from the same generating distribution / method. Attributes: length: The amount of items to generate. config: A config object that holds, per implemented metod, a list of required parameters and a reference to the method. """ # Class Methods def __init__(self, wanted_length: int = 10): """ Initializes the SequenceGenerator class. Parameters: wanted_length: The desired amount of items to generate. 10 by default. """ # Test for invalid lengths if wanted_length <= 0: raise InvalidLengthException(wanted_length) # At this point we know that length is at least 1. self.length: int = wanted_length """The amount of items to generate.""" self.config: Dict[str, Dict[str, Any]] = { # fib = short term, multiple dependencies "fib": {"parameters": ["first", "second"], "method": self.__fib_wrapper}, "pascal": {"parameters": ["first"], "method": self.__pascal_wrapper}, "recaman": {"parameters": ["first"], "method": self.__recaman_wrapper}, "catalan": {"parameters": ["first"], "method": self.__catalan_wrapper}, "range_up": { "parameters": ["first", "step"], "method": self.__range_up_wrapper, }, "range_down": { "parameters": ["last", "step"], "method": self.__range_down_wrapper, }, # long term, multiple dependency "long_term_dependency": { "parameters": ["first", "second", "third", "fourth", "fifth"], "method": self.__long_term_dependency_wrapper, }, # long term, singular dependency "long_term_single_dependency": { "parameters": [ "first", "second", "third", "fourth", "fifth", "constant", ], "method": self.__long_term_single_dependency_wrapper, }, # short term, singular dependency "short_term_single_dependency": { "parameters": ["first", "constant"], "method": self.__short_term_single_dependency_wrapper, }, } """A config object that holds, per implemented metod, a list of required parameters and a reference to the method.""" def __repr__(self) -> str: return f"SequenceGenerator(length={self.length}, implemented_generators={self.get_generators()})" # Generator methods def __fib(self, first: int = 1, second: int = 1) -> Generator: """ Yield the first `self.length` numbers of the Fibonnaci sequence where the first term is `first` and the second term is `second`. Parameters: first: The first element of the sequence. second: The second element of the sequence. Returns a generator that generates the sequence. """ yield first # If you only want 1 number, for some reason? if self.length == 1: return yield second count = 0 while count < self.length - 2: # Increment count count += 1 # Compute next value current = first + second yield current # Update values first = second second = current def __fib_wrapper(self, params: Dict[str, int]) -> Generator: """ Wrapper method for `self.fib`. Written so we can have a unified interface to generate traces, given a sequence key. **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`. """ return self.__fib(first=params["first"], second=params["second"]) def __pascal(self, first: int = 1) -> Generator: """ Yield the first `self.length` numbers of the sequence defined by reading the pascal triangle from left to right, top to bottom, where the first integer is `first` (usually this is 1). Parameters: first: The first integer on top of the triangle, and consequently the first integer in the sequence. Returns a generator that generates the sequence. """ yield first # If for some reason you only want the first number? if self.length == 1: return def next_row(row: List[int]) -> List[int]: """ Computes the next row in the triangle of pascal. Parameters: row: The current row. Returns a the next row. """ lst = [] tmp = 0 for val in row: lst.append(tmp + val) tmp = val lst.append(first) return lst row = [first] # Keep track of counts, start at 1 counts = 1 while True: # compute the next row next = next_row(row) # loop through them for item in next: # test for length if counts < self.length: yield item counts += 1 else: return row = next def __pascal_wrapper(self, params: Dict[str, int]) -> Generator: """ Wrapper method for `self.pascal`. Written so we can have a unified interface to generate traces, given a sequence key. **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`. """ return self.__pascal(first=params["first"]) def __recaman(self, first: int = 0) -> Generator: """ Generator for the Recaman's sequence, a well known sequence from the on-line encyclopedia of integer sequences. Available [here](https://oeis.org/A005132). Parameters: first: The first element of the sequence. The original sequence defines this as 0. Returns a generator that generates the sequence. """ # If for some reason you only want the first number? if self.length == 1: yield first return # Keep track of a count and already seen digits count = 0 current = first already_seen = set([]) def get_next(current: int, index: int) -> int: """ Computes the next value in the sequence. Parameters: current: The current value. index: The index of the current value. Returns the new value. """ # Compute a(n) = a(n-1) - n # if nonnegative and not in sequence, return new_number = current - index if (new_number < 0) | (new_number in already_seen): # Negative or already seen: add index in stead new_number = current + index return new_number while count < self.length: # Compute next term in the sequence new = get_next(current, count) # Save it to the set of already seen terms already_seen.add(new) # Set current to new value and increase count count += 1 current = new # Finally, yield the correct value yield current def __recaman_wrapper(self, params: Dict[str, int]) -> Generator: """ Wrapper method for `self.recaman`. Written so we can have a unified interface to generate traces, given a sequence key. **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`. """ return self.__recaman(first=params["first"]) def __catalan(self, first: int = 1) -> Generator: """ Generates the Catalan numbers, where the first integer is parametrised. The catalan sequence is available [here](https://oeis.org/A000108). Implemented using dynamic programming as the direct formula has issues with `n > 30` In particular, 14544636039226909 became 14544636039226908 and all subsequent values were off. Parameters: first: The first element of the sequence. The original sequence defines this as 1. Returns a generator that generates the sequence. """ # Initialise dynamic programming table dp = [0] * (self.length + 1) dp[0] = first # in our case this is a parameter. By default it should be 1. dp[1] = first # in our case this is a parameter. By default it should be 1. # Fill the dp entries based on the recursive formula for i in range(2, self.length + 1): for j in range(i): dp[i] += dp[j] * dp[i - j - 1] # Loop through table, and yield the next item as long as the index does not exceed the length! for index in range(len(dp)): if index < self.length: yield dp[index] def __catalan_wrapper(self, params: Dict[str, int]) -> Generator: """ Wrapper method for `self.recaman`. Written so we can have a unified interface to generate traces, given a sequence key. **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`. """ return self.__catalan(first=params["first"]) def __range_up(self, first: int = 0, step: int = 1) -> Generator: """ Simple range generator that counts up. Parameters: first: The first element of the sequence. step: The stepsize passed to range Returns a generator that generates the sequence. """ # Use our own generator def range_generator(first: int, step: int) -> Generator: """ Internal range generator that counts up. """ i = first while True: yield i i += step count = 0 for i in range_generator(first, step): if count < self.length: yield i count += 1 else: break def __range_up_wrapper(self, params: Dict[str, int]) -> Generator: """ Wrapper method for `self.range_up`. Written so we can have a unified interface to generate traces, given a sequence key. **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`. """ return self.__range_up(first=params["first"], step=params["step"]) def __range_down(self, last: int = 0, step: int = 1) -> Generator: """ Simple range generator that counts down with stepsize `step` such that the last element will be `last`. Parameters: last: The last element of the sequence. step: The stepsize passed to range Returns a generator that generates the sequence. """ # Compute highest possible value such that we do not go negative first = last + step * self.length - 1 # Use our own generator def range_generator(first: int, step: int) -> Generator: """ Internal range generator that counts down. """ i = first while True: yield i i -= step count = 0 for i in range_generator(first, step): if count < self.length: yield i count += 1 else: break def __range_down_wrapper(self, params: Dict[str, int]) -> Generator: """ Wrapper method for `self.range_down`. Written so we can have a unified interface to generate traces, given a sequence key. **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`. """ return self.__range_down(last=params["last"], step=params["step"]) def __long_term_dependency( self, first: int = 0, second: int = 0, third: int = 0, fourth: int = 0, fifth: int = 0, ) -> Generator: """ Generalization of Fibonacci sequence with increased dependency, where F(n) = F(n-1) + F(n-5). In other words, the 6th term is equal to the sum of the 5th and the 1st. """ count = 0 # Basecases should_check = [first, second, third, fourth, fifth] for num in should_check: if count < self.length: yield num count += 1 else: return # Assume we want the next element in general case # Need to keep points to 5 previous elements n_minus_1 = fifth n_minus_2 = fourth n_minus_3 = third n_minus_4 = second n_minus_5 = first while count < self.length: # Increment count count += 1 # Compute next number and yield it n = n_minus_5 + n_minus_1 yield n # Update values n_minus_1, n_minus_2, n_minus_3, n_minus_4, n_minus_5 = ( n, n_minus_1, n_minus_2, n_minus_3, n_minus_4, ) def __long_term_dependency_wrapper(self, params: Dict[str, int]) -> Generator: """ Wrapper method for `self.long_term_dependency`. Written so we can have a unified interface to generate traces, given a sequence key. **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`. """ return self.__long_term_dependency( first=params["first"], second=params["second"], third=params["third"], fourth=params["fourth"], fifth=params["fifth"], ) def __long_term_single_dependency( self, first: int = 0, second: int = 0, third: int = 0, fourth: int = 0, fifth: int = 0, constant: int = 1, ) -> Generator: """ F(n) = F(n-5) * c. """ count = 0 # Basecases should_check = [first, second, third, fourth, fifth] for num in should_check: if count < self.length: yield num count += 1 else: return # Assume we want the next element in general case # Need to keep points to 5 previous elements n_minus_1 = fifth n_minus_2 = fourth n_minus_3 = third n_minus_4 = second n_minus_5 = first while count < self.length: # Increment count count += 1 # Compute next number and yield it n = n_minus_5 * constant yield n # Update values n_minus_1, n_minus_2, n_minus_3, n_minus_4, n_minus_5 = ( n, n_minus_1, n_minus_2, n_minus_3, n_minus_4, ) def __long_term_single_dependency_wrapper( self, params: Dict[str, int] ) -> Generator: """ Wrapper method for `self.__long_term_single_dependency`. Written so we can have a unified interface to generate traces, given a sequence key. **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`. """ return self.__long_term_single_dependency( first=params["first"], second=params["second"], third=params["third"], fourth=params["fourth"], fifth=params["fifth"], constant=params["constant"], ) def __short_term_single_dependency( self, first: int = 1, constant: int = 2 ) -> Generator: """ A short term dependency. F(n) = F(n-1) * c """ yield first # If you only want 1 number, for some reason? if self.length == 1: return count = 0 while count < self.length - 1: # Increment count count += 1 # Compute next value current = first * constant yield current # Update values first = current def __short_term_single_dependency_wrapper( self, params: Dict[str, int] ) -> Generator: """ Wrapper method for `self.range_down`. Written so we can have a unified interface to generate traces, given a sequence key. **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`. """ return self.__short_term_single_dependency( first=params["first"], constant=params["constant"] ) # Private getters def __get_params(self, seq_name: str) -> List[str]: """ Gets config parameters for a particul sequence generator. Parameters: seq_name: The name of the sequence generation method for which to retrieve parameters. Returns a list of parameters. """ params = [str(item) for item in self.config[seq_name]["parameters"]] return params def __get_method(self, seq_name: str) -> Callable[[Dict[str, int]], Generator]: """ Gets the method reference for a particular sequence generator. Parameters: seq_name: The name of the sequence generation method for which to retrieve a method reference. Returns a method reference. """ return self.config[seq_name]["method"] # Helper methods def __check_params(self, given: Dict[str, Any], required: List[str]) -> None: """ Checks correctness of supplied parameters to `self.generate_trace` or `self.generate_log`. Parameters: given: The given dictionary. required : The required items. Raises a ``MissingRequiredParameter`` when something that was required wasn't there. """ missing = [param for param in required if param not in given] if missing: raise MissingRequiredParameter(missing) def __build_params( self, given: Dict[str, Any], required: List[Any] ) -> Dict[str, Any]: """ Builds a keyword-argument dictionary given the parameters in `self.generate_trace` or `self.generate_log`. Parameters: given: The given dictionary. required : The required items. Returns a dictionary of the form: ``` { "param1" : value, "param2" : value, ... } ``` """ return { required_parameter: given[required_parameter] for required_parameter in required } def __build_param_matrix( self, givens: Dict[str, Any], requireds: List[str] ) -> List[Dict[str, int]]: """ Builds parameter list for usage in `self.generate_log`. Transforms a dictionary of shape: ``` { "required_param1" : [1, 2, ..], "required_param2" : [1, 2, 3, 4, ..], "required_param3" : [1, ..], .. } ``` to our wanted list of shape: ``` [ { "required_param1" : 1, "required_param2" : 1, "required_param3" : 1 }, ... { "required_param1" : 3, "required_param2" : 3, "required_param3" : 3 } ] ``` Parameters: given: The given dictionary. required : The required items. Returns a list of dictionaries as listed above. """ # Keep result variable result = [] # Create dictionary out of lists array_dict = self.__build_params(givens, requireds) # Compute the cartesian product using itertools compute_product_of_me = list(array_dict.values()) for tuple_of_vals in itertools.product(*compute_product_of_me): # tuple_of_vals: (a, b, c, ...) # len(item) == len(keys) # item[0] corresponds with keys[0] keys = [key[:-1] for key in array_dict.keys()] result.append({key: value for (key, value) in zip(keys, tuple_of_vals)}) return result def __check_length_with_params(self, seq_name: str) -> None: """ Checks whether or not we can mathematically generate a trace of length `self.length` given a particular generator, identified by `seq_name`. Parameters: seq_name: The name of the sequence generation method for which to perform this check. Raises an `InvalidLengthException` when a sequence cannot be generated due to mismatch of required params and wanted length. """ min_len_for_method = len(self.__get_params(seq_name)) if min_len_for_method > self.length: raise InvalidLengthException( length=min_len_for_method, message=f"Cannot generate sequence of length {self.length}\ if a method needs a minimum of %s parameters", ) # Public methods def get_generators(self) -> List[str]: """ Gets implemented generator functions. Returns a list of the names of implemented generator functions. """ return [generator for generator in self.config.keys()] def generate_trace(self, seq_name: str, **kwargs: Any) -> Generator: """ Generates a single trace corresponding to some sequence. Parameters: seq_name: The name of the sequence generation method for which to generate a trace. Raises a `NotYetImplemented` when the `seq_name` key does not correspond to a generator method. Raises a `MissingRequiredParameter` when a particular parameter was not provided. Returns a generator for a particular sequence. """ try: check_item_list(seq_name.strip().lower(), self.get_generators()) except MissingItem: raise NotYetImplemented(seq_name) # It exists, check for param mismatch required_params = self.__get_params(seq_name) self.__check_params(kwargs, required_params) # required (and possibly more) params present -- retrieve reference to generator method = self.__get_method(seq_name) # build params to pass through method_params = self.__build_params(kwargs, required_params) # call the function, and return its result return method(method_params) def generate_log(self, seq_name: str, **kwargs: Any) -> List[Tuple[int, ...]]: """ Generates an entire log corresponding to some sequence. Parameters: seq_name: The name of the sequence generation method for which to generate a log. Raises a `NotYetImplemented` when the `seq_name` key does not correspond to a generator method. Raises a `MissingRequiredParameter` when a particular parameter was not provided. Returns a log of traces a list of tuples. """ try: check_item_list(seq_name.strip().lower(), self.get_generators()) except MissingItem: raise NotYetImplemented(seq_name) required_params = [param + "s" for param in self.__get_params(seq_name)] self.__check_params(kwargs, required_params) self.__check_length_with_params(seq_name) # Create the log variable as a set log = [] for params in self.__build_param_matrix(kwargs, required_params): trace = tuple(self.generate_trace(seq_name, **params)) log.append(trace) return logInstance variables
var config-
A config object that holds, per implemented metod, a list of required parameters and a reference to the method.
var length-
The amount of items to generate.
Methods
def _SequenceGenerator__build_param_matrix(self, givens: Dict[str, Any], requireds: List[str]) ‑> List[Dict[str, int]]-
Builds parameter list for usage in
self.generate_log.Transforms a dictionary of shape:
{ "required_param1" : [1, 2, ..], "required_param2" : [1, 2, 3, 4, ..], "required_param3" : [1, ..], .. }to our wanted list of shape:
[ { "required_param1" : 1, "required_param2" : 1, "required_param3" : 1 }, ... { "required_param1" : 3, "required_param2" : 3, "required_param3" : 3 } ]Parameters
given: The given dictionary. required : The required items.
Returns a list of dictionaries as listed above.
Expand source code
def __build_param_matrix( self, givens: Dict[str, Any], requireds: List[str] ) -> List[Dict[str, int]]: """ Builds parameter list for usage in `self.generate_log`. Transforms a dictionary of shape: ``` { "required_param1" : [1, 2, ..], "required_param2" : [1, 2, 3, 4, ..], "required_param3" : [1, ..], .. } ``` to our wanted list of shape: ``` [ { "required_param1" : 1, "required_param2" : 1, "required_param3" : 1 }, ... { "required_param1" : 3, "required_param2" : 3, "required_param3" : 3 } ] ``` Parameters: given: The given dictionary. required : The required items. Returns a list of dictionaries as listed above. """ # Keep result variable result = [] # Create dictionary out of lists array_dict = self.__build_params(givens, requireds) # Compute the cartesian product using itertools compute_product_of_me = list(array_dict.values()) for tuple_of_vals in itertools.product(*compute_product_of_me): # tuple_of_vals: (a, b, c, ...) # len(item) == len(keys) # item[0] corresponds with keys[0] keys = [key[:-1] for key in array_dict.keys()] result.append({key: value for (key, value) in zip(keys, tuple_of_vals)}) return result def _SequenceGenerator__build_params(self, given: Dict[str, Any], required: List[Any]) ‑> Dict[str, Any]-
Builds a keyword-argument dictionary given the parameters in
self.generate_traceorself.generate_log.Parameters
given: The given dictionary. required : The required items.
Returns a dictionary of the form:
{ "param1" : value, "param2" : value, ... }Expand source code
def __build_params( self, given: Dict[str, Any], required: List[Any] ) -> Dict[str, Any]: """ Builds a keyword-argument dictionary given the parameters in `self.generate_trace` or `self.generate_log`. Parameters: given: The given dictionary. required : The required items. Returns a dictionary of the form: ``` { "param1" : value, "param2" : value, ... } ``` """ return { required_parameter: given[required_parameter] for required_parameter in required } def _SequenceGenerator__catalan(self, first: int = 1) ‑> Generator-
Generates the Catalan numbers, where the first integer is parametrised. The catalan sequence is available here.
Implemented using dynamic programming as the direct formula has issues with
n > 30In particular, 14544636039226909 became 14544636039226908 and all subsequent values were off.Parameters
first: The first element of the sequence. The original sequence defines this as 1.
Returns a generator that generates the sequence.
Expand source code
def __catalan(self, first: int = 1) -> Generator: """ Generates the Catalan numbers, where the first integer is parametrised. The catalan sequence is available [here](https://oeis.org/A000108). Implemented using dynamic programming as the direct formula has issues with `n > 30` In particular, 14544636039226909 became 14544636039226908 and all subsequent values were off. Parameters: first: The first element of the sequence. The original sequence defines this as 1. Returns a generator that generates the sequence. """ # Initialise dynamic programming table dp = [0] * (self.length + 1) dp[0] = first # in our case this is a parameter. By default it should be 1. dp[1] = first # in our case this is a parameter. By default it should be 1. # Fill the dp entries based on the recursive formula for i in range(2, self.length + 1): for j in range(i): dp[i] += dp[j] * dp[i - j - 1] # Loop through table, and yield the next item as long as the index does not exceed the length! for index in range(len(dp)): if index < self.length: yield dp[index] def _SequenceGenerator__catalan_wrapper(self, params: Dict[str, int]) ‑> Generator-
Wrapper method for
self.recaman. Written so we can have a unified interface to generate traces, given a sequence key.Unsafe when used in any other place than the generation config dict
SequenceGenerator.config.Expand source code
def __catalan_wrapper(self, params: Dict[str, int]) -> Generator: """ Wrapper method for `self.recaman`. Written so we can have a unified interface to generate traces, given a sequence key. **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`. """ return self.__catalan(first=params["first"]) def _SequenceGenerator__check_length_with_params(self, seq_name: str) ‑> NoneType-
Checks whether or not we can mathematically generate a trace of length
self.lengthgiven a particular generator, identified byseq_name.Parameters
seq_name: The name of the sequence generation method for which to perform this check.
Raises an
InvalidLengthExceptionwhen a sequence cannot be generated due to mismatch of required params and wanted length.Expand source code
def __check_length_with_params(self, seq_name: str) -> None: """ Checks whether or not we can mathematically generate a trace of length `self.length` given a particular generator, identified by `seq_name`. Parameters: seq_name: The name of the sequence generation method for which to perform this check. Raises an `InvalidLengthException` when a sequence cannot be generated due to mismatch of required params and wanted length. """ min_len_for_method = len(self.__get_params(seq_name)) if min_len_for_method > self.length: raise InvalidLengthException( length=min_len_for_method, message=f"Cannot generate sequence of length {self.length}\ if a method needs a minimum of %s parameters", ) def _SequenceGenerator__check_params(self, given: Dict[str, Any], required: List[str]) ‑> NoneType-
Checks correctness of supplied parameters to
self.generate_traceorself.generate_log.Parameters
given: The given dictionary. required : The required items.
Raises a
MissingRequiredParameterwhen something that was required wasn't there.Expand source code
def __check_params(self, given: Dict[str, Any], required: List[str]) -> None: """ Checks correctness of supplied parameters to `self.generate_trace` or `self.generate_log`. Parameters: given: The given dictionary. required : The required items. Raises a ``MissingRequiredParameter`` when something that was required wasn't there. """ missing = [param for param in required if param not in given] if missing: raise MissingRequiredParameter(missing) def _SequenceGenerator__fib(self, first: int = 1, second: int = 1) ‑> Generator-
Yield the first
self.lengthnumbers of the Fibonnaci sequence where the first term isfirstand the second term issecond.Parameters
first: The first element of the sequence. second: The second element of the sequence.
Returns a generator that generates the sequence.
Expand source code
def __fib(self, first: int = 1, second: int = 1) -> Generator: """ Yield the first `self.length` numbers of the Fibonnaci sequence where the first term is `first` and the second term is `second`. Parameters: first: The first element of the sequence. second: The second element of the sequence. Returns a generator that generates the sequence. """ yield first # If you only want 1 number, for some reason? if self.length == 1: return yield second count = 0 while count < self.length - 2: # Increment count count += 1 # Compute next value current = first + second yield current # Update values first = second second = current def _SequenceGenerator__fib_wrapper(self, params: Dict[str, int]) ‑> Generator-
Wrapper method for
self.fib. Written so we can have a unified interface to generate traces, given a sequence key.Unsafe when used in any other place than the generation config dict
SequenceGenerator.config.Expand source code
def __fib_wrapper(self, params: Dict[str, int]) -> Generator: """ Wrapper method for `self.fib`. Written so we can have a unified interface to generate traces, given a sequence key. **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`. """ return self.__fib(first=params["first"], second=params["second"]) def _SequenceGenerator__get_method(self, seq_name: str) ‑> Callable[[Dict[str, int]], Generator]-
Gets the method reference for a particular sequence generator.
Parameters
seq_name: The name of the sequence generation method for which to retrieve a method reference.
Returns a method reference.
Expand source code
def __get_method(self, seq_name: str) -> Callable[[Dict[str, int]], Generator]: """ Gets the method reference for a particular sequence generator. Parameters: seq_name: The name of the sequence generation method for which to retrieve a method reference. Returns a method reference. """ return self.config[seq_name]["method"] def _SequenceGenerator__get_params(self, seq_name: str) ‑> List[str]-
Gets config parameters for a particul sequence generator.
Parameters
seq_name: The name of the sequence generation method for which to retrieve parameters.
Returns a list of parameters.
Expand source code
def __get_params(self, seq_name: str) -> List[str]: """ Gets config parameters for a particul sequence generator. Parameters: seq_name: The name of the sequence generation method for which to retrieve parameters. Returns a list of parameters. """ params = [str(item) for item in self.config[seq_name]["parameters"]] return params def _SequenceGenerator__long_term_dependency(self, first: int = 0, second: int = 0, third: int = 0, fourth: int = 0, fifth: int = 0) ‑> Generator-
Generalization of Fibonacci sequence with increased dependency, where F(n) = F(n-1) + F(n-5). In other words, the 6th term is equal to the sum of the 5th and the 1st.
Expand source code
def __long_term_dependency( self, first: int = 0, second: int = 0, third: int = 0, fourth: int = 0, fifth: int = 0, ) -> Generator: """ Generalization of Fibonacci sequence with increased dependency, where F(n) = F(n-1) + F(n-5). In other words, the 6th term is equal to the sum of the 5th and the 1st. """ count = 0 # Basecases should_check = [first, second, third, fourth, fifth] for num in should_check: if count < self.length: yield num count += 1 else: return # Assume we want the next element in general case # Need to keep points to 5 previous elements n_minus_1 = fifth n_minus_2 = fourth n_minus_3 = third n_minus_4 = second n_minus_5 = first while count < self.length: # Increment count count += 1 # Compute next number and yield it n = n_minus_5 + n_minus_1 yield n # Update values n_minus_1, n_minus_2, n_minus_3, n_minus_4, n_minus_5 = ( n, n_minus_1, n_minus_2, n_minus_3, n_minus_4, ) def _SequenceGenerator__long_term_dependency_wrapper(self, params: Dict[str, int]) ‑> Generator-
Wrapper method for
self.long_term_dependency. Written so we can have a unified interface to generate traces, given a sequence key.Unsafe when used in any other place than the generation config dict
SequenceGenerator.config.Expand source code
def __long_term_dependency_wrapper(self, params: Dict[str, int]) -> Generator: """ Wrapper method for `self.long_term_dependency`. Written so we can have a unified interface to generate traces, given a sequence key. **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`. """ return self.__long_term_dependency( first=params["first"], second=params["second"], third=params["third"], fourth=params["fourth"], fifth=params["fifth"], ) def _SequenceGenerator__long_term_single_dependency(self, first: int = 0, second: int = 0, third: int = 0, fourth: int = 0, fifth: int = 0, constant: int = 1) ‑> Generator-
F(n) = F(n-5) * c.
Expand source code
def __long_term_single_dependency( self, first: int = 0, second: int = 0, third: int = 0, fourth: int = 0, fifth: int = 0, constant: int = 1, ) -> Generator: """ F(n) = F(n-5) * c. """ count = 0 # Basecases should_check = [first, second, third, fourth, fifth] for num in should_check: if count < self.length: yield num count += 1 else: return # Assume we want the next element in general case # Need to keep points to 5 previous elements n_minus_1 = fifth n_minus_2 = fourth n_minus_3 = third n_minus_4 = second n_minus_5 = first while count < self.length: # Increment count count += 1 # Compute next number and yield it n = n_minus_5 * constant yield n # Update values n_minus_1, n_minus_2, n_minus_3, n_minus_4, n_minus_5 = ( n, n_minus_1, n_minus_2, n_minus_3, n_minus_4, ) def _SequenceGenerator__long_term_single_dependency_wrapper(self, params: Dict[str, int]) ‑> Generator-
Wrapper method for
self.__long_term_single_dependency. Written so we can have a unified interface to generate traces, given a sequence key.Unsafe when used in any other place than the generation config dict
SequenceGenerator.config.Expand source code
def __long_term_single_dependency_wrapper( self, params: Dict[str, int] ) -> Generator: """ Wrapper method for `self.__long_term_single_dependency`. Written so we can have a unified interface to generate traces, given a sequence key. **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`. """ return self.__long_term_single_dependency( first=params["first"], second=params["second"], third=params["third"], fourth=params["fourth"], fifth=params["fifth"], constant=params["constant"], ) def _SequenceGenerator__pascal(self, first: int = 1) ‑> Generator-
Yield the first
self.lengthnumbers of the sequence defined by reading the pascal triangle from left to right, top to bottom, where the first integer isfirst(usually this is 1).Parameters
first: The first integer on top of the triangle, and consequently the first integer in the sequence.
Returns a generator that generates the sequence.
Expand source code
def __pascal(self, first: int = 1) -> Generator: """ Yield the first `self.length` numbers of the sequence defined by reading the pascal triangle from left to right, top to bottom, where the first integer is `first` (usually this is 1). Parameters: first: The first integer on top of the triangle, and consequently the first integer in the sequence. Returns a generator that generates the sequence. """ yield first # If for some reason you only want the first number? if self.length == 1: return def next_row(row: List[int]) -> List[int]: """ Computes the next row in the triangle of pascal. Parameters: row: The current row. Returns a the next row. """ lst = [] tmp = 0 for val in row: lst.append(tmp + val) tmp = val lst.append(first) return lst row = [first] # Keep track of counts, start at 1 counts = 1 while True: # compute the next row next = next_row(row) # loop through them for item in next: # test for length if counts < self.length: yield item counts += 1 else: return row = next def _SequenceGenerator__pascal_wrapper(self, params: Dict[str, int]) ‑> Generator-
Wrapper method for
self.pascal. Written so we can have a unified interface to generate traces, given a sequence key.Unsafe when used in any other place than the generation config dict
SequenceGenerator.config.Expand source code
def __pascal_wrapper(self, params: Dict[str, int]) -> Generator: """ Wrapper method for `self.pascal`. Written so we can have a unified interface to generate traces, given a sequence key. **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`. """ return self.__pascal(first=params["first"]) def _SequenceGenerator__range_down(self, last: int = 0, step: int = 1) ‑> Generator-
Simple range generator that counts down with stepsize
stepsuch that the last element will belast.Parameters
last: The last element of the sequence. step: The stepsize passed to range
Returns a generator that generates the sequence.
Expand source code
def __range_down(self, last: int = 0, step: int = 1) -> Generator: """ Simple range generator that counts down with stepsize `step` such that the last element will be `last`. Parameters: last: The last element of the sequence. step: The stepsize passed to range Returns a generator that generates the sequence. """ # Compute highest possible value such that we do not go negative first = last + step * self.length - 1 # Use our own generator def range_generator(first: int, step: int) -> Generator: """ Internal range generator that counts down. """ i = first while True: yield i i -= step count = 0 for i in range_generator(first, step): if count < self.length: yield i count += 1 else: break def _SequenceGenerator__range_down_wrapper(self, params: Dict[str, int]) ‑> Generator-
Wrapper method for
self.range_down. Written so we can have a unified interface to generate traces, given a sequence key.Unsafe when used in any other place than the generation config dict
SequenceGenerator.config.Expand source code
def __range_down_wrapper(self, params: Dict[str, int]) -> Generator: """ Wrapper method for `self.range_down`. Written so we can have a unified interface to generate traces, given a sequence key. **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`. """ return self.__range_down(last=params["last"], step=params["step"]) def _SequenceGenerator__range_up(self, first: int = 0, step: int = 1) ‑> Generator-
Simple range generator that counts up.
Parameters
first: The first element of the sequence. step: The stepsize passed to range
Returns a generator that generates the sequence.
Expand source code
def __range_up(self, first: int = 0, step: int = 1) -> Generator: """ Simple range generator that counts up. Parameters: first: The first element of the sequence. step: The stepsize passed to range Returns a generator that generates the sequence. """ # Use our own generator def range_generator(first: int, step: int) -> Generator: """ Internal range generator that counts up. """ i = first while True: yield i i += step count = 0 for i in range_generator(first, step): if count < self.length: yield i count += 1 else: break def _SequenceGenerator__range_up_wrapper(self, params: Dict[str, int]) ‑> Generator-
Wrapper method for
self.range_up. Written so we can have a unified interface to generate traces, given a sequence key.Unsafe when used in any other place than the generation config dict
SequenceGenerator.config.Expand source code
def __range_up_wrapper(self, params: Dict[str, int]) -> Generator: """ Wrapper method for `self.range_up`. Written so we can have a unified interface to generate traces, given a sequence key. **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`. """ return self.__range_up(first=params["first"], step=params["step"]) def _SequenceGenerator__recaman(self, first: int = 0) ‑> Generator-
Generator for the Recaman's sequence, a well known sequence from the on-line encyclopedia of integer sequences. Available here.
Parameters
first: The first element of the sequence. The original sequence defines this as 0.
Returns a generator that generates the sequence.
Expand source code
def __recaman(self, first: int = 0) -> Generator: """ Generator for the Recaman's sequence, a well known sequence from the on-line encyclopedia of integer sequences. Available [here](https://oeis.org/A005132). Parameters: first: The first element of the sequence. The original sequence defines this as 0. Returns a generator that generates the sequence. """ # If for some reason you only want the first number? if self.length == 1: yield first return # Keep track of a count and already seen digits count = 0 current = first already_seen = set([]) def get_next(current: int, index: int) -> int: """ Computes the next value in the sequence. Parameters: current: The current value. index: The index of the current value. Returns the new value. """ # Compute a(n) = a(n-1) - n # if nonnegative and not in sequence, return new_number = current - index if (new_number < 0) | (new_number in already_seen): # Negative or already seen: add index in stead new_number = current + index return new_number while count < self.length: # Compute next term in the sequence new = get_next(current, count) # Save it to the set of already seen terms already_seen.add(new) # Set current to new value and increase count count += 1 current = new # Finally, yield the correct value yield current def _SequenceGenerator__recaman_wrapper(self, params: Dict[str, int]) ‑> Generator-
Wrapper method for
self.recaman. Written so we can have a unified interface to generate traces, given a sequence key.Unsafe when used in any other place than the generation config dict
SequenceGenerator.config.Expand source code
def __recaman_wrapper(self, params: Dict[str, int]) -> Generator: """ Wrapper method for `self.recaman`. Written so we can have a unified interface to generate traces, given a sequence key. **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`. """ return self.__recaman(first=params["first"]) def _SequenceGenerator__short_term_single_dependency(self, first: int = 1, constant: int = 2) ‑> Generator-
A short term dependency. F(n) = F(n-1) * c
Expand source code
def __short_term_single_dependency( self, first: int = 1, constant: int = 2 ) -> Generator: """ A short term dependency. F(n) = F(n-1) * c """ yield first # If you only want 1 number, for some reason? if self.length == 1: return count = 0 while count < self.length - 1: # Increment count count += 1 # Compute next value current = first * constant yield current # Update values first = current def _SequenceGenerator__short_term_single_dependency_wrapper(self, params: Dict[str, int]) ‑> Generator-
Wrapper method for
self.range_down. Written so we can have a unified interface to generate traces, given a sequence key.Unsafe when used in any other place than the generation config dict
SequenceGenerator.config.Expand source code
def __short_term_single_dependency_wrapper( self, params: Dict[str, int] ) -> Generator: """ Wrapper method for `self.range_down`. Written so we can have a unified interface to generate traces, given a sequence key. **Unsafe** when used in any other place than the generation config dict `SequenceGenerator.config`. """ return self.__short_term_single_dependency( first=params["first"], constant=params["constant"] ) def generate_log(self, seq_name: str, **kwargs: Any) ‑> List[Tuple[int, ...]]-
Generates an entire log corresponding to some sequence.
Parameters
seq_name: The name of the sequence generation method for which to generate a log.
Raises a
NotYetImplementedwhen theseq_namekey does not correspond to a generator method. Raises aMissingRequiredParameterwhen a particular parameter was not provided.Returns a log of traces a list of tuples.
Expand source code
def generate_log(self, seq_name: str, **kwargs: Any) -> List[Tuple[int, ...]]: """ Generates an entire log corresponding to some sequence. Parameters: seq_name: The name of the sequence generation method for which to generate a log. Raises a `NotYetImplemented` when the `seq_name` key does not correspond to a generator method. Raises a `MissingRequiredParameter` when a particular parameter was not provided. Returns a log of traces a list of tuples. """ try: check_item_list(seq_name.strip().lower(), self.get_generators()) except MissingItem: raise NotYetImplemented(seq_name) required_params = [param + "s" for param in self.__get_params(seq_name)] self.__check_params(kwargs, required_params) self.__check_length_with_params(seq_name) # Create the log variable as a set log = [] for params in self.__build_param_matrix(kwargs, required_params): trace = tuple(self.generate_trace(seq_name, **params)) log.append(trace) return log def generate_trace(self, seq_name: str, **kwargs: Any) ‑> Generator-
Generates a single trace corresponding to some sequence.
Parameters
seq_name: The name of the sequence generation method for which to generate a trace.
Raises a
NotYetImplementedwhen theseq_namekey does not correspond to a generator method. Raises aMissingRequiredParameterwhen a particular parameter was not provided.Returns a generator for a particular sequence.
Expand source code
def generate_trace(self, seq_name: str, **kwargs: Any) -> Generator: """ Generates a single trace corresponding to some sequence. Parameters: seq_name: The name of the sequence generation method for which to generate a trace. Raises a `NotYetImplemented` when the `seq_name` key does not correspond to a generator method. Raises a `MissingRequiredParameter` when a particular parameter was not provided. Returns a generator for a particular sequence. """ try: check_item_list(seq_name.strip().lower(), self.get_generators()) except MissingItem: raise NotYetImplemented(seq_name) # It exists, check for param mismatch required_params = self.__get_params(seq_name) self.__check_params(kwargs, required_params) # required (and possibly more) params present -- retrieve reference to generator method = self.__get_method(seq_name) # build params to pass through method_params = self.__build_params(kwargs, required_params) # call the function, and return its result return method(method_params) def get_generators(self) ‑> List[str]-
Gets implemented generator functions.
Returns a list of the names of implemented generator functions.
Expand source code
def get_generators(self) -> List[str]: """ Gets implemented generator functions. Returns a list of the names of implemented generator functions. """ return [generator for generator in self.config.keys()]
class XESTransformator-
Transforms a process mining log into integer sequences. Can only handle extensions present in
readable_exts.Attributes
readable_exts – All extensions that this transformator can handle. Initialises the XESTransformator class.
Expand source code
class XESTransformator: """ Transforms a process mining log into integer sequences. Can only handle extensions present in `readable_exts`. Attributes: readable_exts -- All extensions that this transformator can handle. """ # Class Methods def __init__(self) -> None: """ Initialises the XESTransformator class. """ self.readable_exts = [".xes", ".xes.gz"] """All extensions that this transformator can handle.""" def __repr__(self) -> str: return f"XESTransformator(readable_exts={self.readable_exts})" # Helper methods def __check_log(self, file: str) -> None: """ Performs some checks on the file and raises exceptions when something is wrong. In particular: Check 0: Check if it is a file. Check 1: Check if the file is readable. Check 2: Check if the extension is one that we can parse. Parameters: file -- The file (as string) to check. Can be relative or absolute path. Raises `FileNotFoundError` if check 0 fails. Raises `PermissionError` if check 1 fails. Raises `InvalidLogFormat` if check 2 fails. """ # Check 0: Did we get a file? if not isfile(file): raise FileNotFoundError(file) # Check 1: Can we read the file? if not access(file, R_OK): raise PermissionError(file) # Check 2: Is the extension in one of the readable ones? if not any([True for ext in self.readable_exts if ext in file]): raise InvalidLogFormat(filepath=file) def __build_mapping(self, root: ET.Element, file: str) -> Dict[str, int]: """ Builds a mapping from key (XES concept:name) to integer. Parameters: root -- The root element (`<log>`) of an XES log file. file -- Path to the file, used for reporting errors. Returns a dictionary mapping a string (key) to an integer. """ # Check element tag if "log" not in root.tag: raise InvalidElementPassed(expected="log", element=root.tag) # Retrieve element by XPath dictionary_root_field = root.find( ".//*[@key='meta_concept:named_events_total']" ) if dictionary_root_field is None: raise ParsingError( filepath=file, reason="'meta_concept:named_events_total' is not" + "present in the log. Does your log adhere to the OpenXES standard?", ) # Initialise the mapping as empty dictionary mapping = dict() # Loop through all elements for index, element in enumerate(dictionary_root_field.iter()): # Skip the root item itself. if element != dictionary_root_field: # Retrieve the key from element attributes key = element.attrib["key"] # Set the mapping entry to an integer (index is used) mapping[key] = index return mapping def __get_all_traces(self, root: ET.Element) -> List[ET.Element]: """ Given the root element (should be log), return a list of all trace elements. Parameters: root -- The root element (`<log>`) of an XES log file. Returns a list of trace elements (`<trace>`). """ if "log" not in root.tag: raise InvalidElementPassed(expected="log", element=root.tag) return [elem for elem in root.iter() if "trace" in elem.tag] def __get_all_events(self, root: ET.Element) -> List[ET.Element]: """ Given the root element (should be trace), return a list of all event elements. Parameters: root -- Any trace element (`<trace>`) of an XES log file. Returns a list of event elements (`<event>`). """ if "trace" not in root.tag: raise InvalidElementPassed(expected="trace", element=root.tag) return [elem for elem in root.iter() if "event" in elem.tag] # Parsing methods def __parse_with_ET(self, file: str, gzipped: bool = False) -> ET.ElementTree: """ Parse the XML with ElementTree Distinguishes between gzipped and normal format. Parameters: file -- The file to parse. gzipped -- Boolean indicating whether or not the file is gzipped. False by default. """ try: if gzipped: with gzip.open(file) as unzipped_file: return ET.parse(unzipped_file) return ET.parse(file) except ET.ParseError: raise ParsingError( filepath=file, reason="Element Tree ParseError was raised." ) def __parse(self, file: str) -> ET.Element: """ Parses a file (that is either .xes or .xes.gz) with ElementTree and returns the root element. Parameters: file -- The file to parse. Returns the root element (`<log>`) of an XES log. """ tree = self.__parse_with_ET(file, ".gz" in file) return tree.getroot() # Transforming methods def __convert_trace( self, trace: ET.Element, mapping: Dict[str, int], file: str ) -> Tuple[int, ...]: """ Converts a single trace element into a tuple of integers. Parameters: trace -- The trace element (`<trace>`) to convert. mapping -- The mapping that defines how to convert. file -- Path to file, used for logging. Returns a tuple of integers representing a trace according to some mapping. """ # Check element tag if "trace" not in trace.tag: raise InvalidElementPassed(expected="trace", element=trace.tag) converted = [] for event in self.__get_all_events(trace): key_element = event.find(".//*[@key='concept:name']") # Explicit not None check -> Ensures that key element is of type Element. if key_element is not None: key_itself = key_element.attrib["value"] converted.append(mapping[key_itself]) else: # Got None, cannot process this file. raise ParsingError( filepath=file, reason="Cannot find key elements while transforming traces.", ) return tuple(converted) def __make_log( self, root: ET.Element, mapping: Dict[str, int], file: str ) -> List[Tuple[int, ...]]: """ Makes a log, given a root element and a mapping dictionary. Parameters: root -- the root element (`<log>`) of an XES log. mapping -- a mapping from key (XES concept:name) to integer. file -- Path to file, used for logging. Returns a transformed log. """ # Check element tag if "log" not in root.tag: raise InvalidElementPassed(expected="log", element=root.tag) # Initialise empty log list log = [] # Iterate over all traces for trace in self.__get_all_traces(root): # Convert a single trace and add it to the log converted_trace = self.__convert_trace(trace, mapping, file) log.append(converted_trace) return log def transform(self, log: str) -> List[Tuple[int, ...]]: """ Transforms a XES log into integer sequences. Parameters: log -- A logfile to transform to integer sequences. Returnsa transformed log. """ # Check the log self.__check_log(log) # Parse the log if possible root = self.__parse(log) # Build the name mapping name_mapping = self.__build_mapping(root, log) # Build the log return self.__make_log(root, name_mapping, log)Instance variables
var readable_exts-
All extensions that this transformator can handle.
Methods
def _XESTransformator__build_mapping(self, root: xml.etree.ElementTree.Element, file: str) ‑> Dict[str, int]-
Builds a mapping from key (XES concept:name) to integer.
Parameters
root – The root element (
<log>) of an XES log file. file – Path to the file, used for reporting errors.Returns a dictionary mapping a string (key) to an integer.
Expand source code
def __build_mapping(self, root: ET.Element, file: str) -> Dict[str, int]: """ Builds a mapping from key (XES concept:name) to integer. Parameters: root -- The root element (`<log>`) of an XES log file. file -- Path to the file, used for reporting errors. Returns a dictionary mapping a string (key) to an integer. """ # Check element tag if "log" not in root.tag: raise InvalidElementPassed(expected="log", element=root.tag) # Retrieve element by XPath dictionary_root_field = root.find( ".//*[@key='meta_concept:named_events_total']" ) if dictionary_root_field is None: raise ParsingError( filepath=file, reason="'meta_concept:named_events_total' is not" + "present in the log. Does your log adhere to the OpenXES standard?", ) # Initialise the mapping as empty dictionary mapping = dict() # Loop through all elements for index, element in enumerate(dictionary_root_field.iter()): # Skip the root item itself. if element != dictionary_root_field: # Retrieve the key from element attributes key = element.attrib["key"] # Set the mapping entry to an integer (index is used) mapping[key] = index return mapping def _XESTransformator__check_log(self, file: str) ‑> NoneType-
Performs some checks on the file and raises exceptions when something is wrong.
In particular: Check 0: Check if it is a file. Check 1: Check if the file is readable. Check 2: Check if the extension is one that we can parse.
Parameters
file – The file (as string) to check. Can be relative or absolute path.
Raises
FileNotFoundErrorif check 0 fails. RaisesPermissionErrorif check 1 fails. RaisesInvalidLogFormatif check 2 fails.Expand source code
def __check_log(self, file: str) -> None: """ Performs some checks on the file and raises exceptions when something is wrong. In particular: Check 0: Check if it is a file. Check 1: Check if the file is readable. Check 2: Check if the extension is one that we can parse. Parameters: file -- The file (as string) to check. Can be relative or absolute path. Raises `FileNotFoundError` if check 0 fails. Raises `PermissionError` if check 1 fails. Raises `InvalidLogFormat` if check 2 fails. """ # Check 0: Did we get a file? if not isfile(file): raise FileNotFoundError(file) # Check 1: Can we read the file? if not access(file, R_OK): raise PermissionError(file) # Check 2: Is the extension in one of the readable ones? if not any([True for ext in self.readable_exts if ext in file]): raise InvalidLogFormat(filepath=file) def _XESTransformator__convert_trace(self, trace: xml.etree.ElementTree.Element, mapping: Dict[str, int], file: str) ‑> Tuple[int, ...]-
Converts a single trace element into a tuple of integers.
Parameters
trace – The trace element (
<trace>) to convert. mapping – The mapping that defines how to convert. file – Path to file, used for logging.Returns a tuple of integers representing a trace according to some mapping.
Expand source code
def __convert_trace( self, trace: ET.Element, mapping: Dict[str, int], file: str ) -> Tuple[int, ...]: """ Converts a single trace element into a tuple of integers. Parameters: trace -- The trace element (`<trace>`) to convert. mapping -- The mapping that defines how to convert. file -- Path to file, used for logging. Returns a tuple of integers representing a trace according to some mapping. """ # Check element tag if "trace" not in trace.tag: raise InvalidElementPassed(expected="trace", element=trace.tag) converted = [] for event in self.__get_all_events(trace): key_element = event.find(".//*[@key='concept:name']") # Explicit not None check -> Ensures that key element is of type Element. if key_element is not None: key_itself = key_element.attrib["value"] converted.append(mapping[key_itself]) else: # Got None, cannot process this file. raise ParsingError( filepath=file, reason="Cannot find key elements while transforming traces.", ) return tuple(converted) def _XESTransformator__get_all_events(self, root: xml.etree.ElementTree.Element) ‑> List[xml.etree.ElementTree.Element]-
Given the root element (should be trace), return a list of all event elements.
Parameters
root – Any trace element (
<trace>) of an XES log file.Returns a list of event elements (
<event>).Expand source code
def __get_all_events(self, root: ET.Element) -> List[ET.Element]: """ Given the root element (should be trace), return a list of all event elements. Parameters: root -- Any trace element (`<trace>`) of an XES log file. Returns a list of event elements (`<event>`). """ if "trace" not in root.tag: raise InvalidElementPassed(expected="trace", element=root.tag) return [elem for elem in root.iter() if "event" in elem.tag] def _XESTransformator__get_all_traces(self, root: xml.etree.ElementTree.Element) ‑> List[xml.etree.ElementTree.Element]-
Given the root element (should be log), return a list of all trace elements.
Parameters
root – The root element (
<log>) of an XES log file.Returns a list of trace elements (
<trace>).Expand source code
def __get_all_traces(self, root: ET.Element) -> List[ET.Element]: """ Given the root element (should be log), return a list of all trace elements. Parameters: root -- The root element (`<log>`) of an XES log file. Returns a list of trace elements (`<trace>`). """ if "log" not in root.tag: raise InvalidElementPassed(expected="log", element=root.tag) return [elem for elem in root.iter() if "trace" in elem.tag] def _XESTransformator__make_log(self, root: xml.etree.ElementTree.Element, mapping: Dict[str, int], file: str) ‑> List[Tuple[int, ...]]-
Makes a log, given a root element and a mapping dictionary.
Parameters
root – the root element (
<log>) of an XES log. mapping – a mapping from key (XES concept:name) to integer. file – Path to file, used for logging.Returns a transformed log.
Expand source code
def __make_log( self, root: ET.Element, mapping: Dict[str, int], file: str ) -> List[Tuple[int, ...]]: """ Makes a log, given a root element and a mapping dictionary. Parameters: root -- the root element (`<log>`) of an XES log. mapping -- a mapping from key (XES concept:name) to integer. file -- Path to file, used for logging. Returns a transformed log. """ # Check element tag if "log" not in root.tag: raise InvalidElementPassed(expected="log", element=root.tag) # Initialise empty log list log = [] # Iterate over all traces for trace in self.__get_all_traces(root): # Convert a single trace and add it to the log converted_trace = self.__convert_trace(trace, mapping, file) log.append(converted_trace) return log def _XESTransformator__parse(self, file: str) ‑> xml.etree.ElementTree.Element-
Parses a file (that is either .xes or .xes.gz) with ElementTree and returns the root element.
Parameters
file – The file to parse.
Returns the root element (
<log>) of an XES log.Expand source code
def __parse(self, file: str) -> ET.Element: """ Parses a file (that is either .xes or .xes.gz) with ElementTree and returns the root element. Parameters: file -- The file to parse. Returns the root element (`<log>`) of an XES log. """ tree = self.__parse_with_ET(file, ".gz" in file) return tree.getroot() def _XESTransformator__parse_with_ET(self, file: str, gzipped: bool = False) ‑> xml.etree.ElementTree.ElementTree-
Parse the XML with ElementTree Distinguishes between gzipped and normal format.
Parameters
file – The file to parse. gzipped – Boolean indicating whether or not the file is gzipped. False by default.
Expand source code
def __parse_with_ET(self, file: str, gzipped: bool = False) -> ET.ElementTree: """ Parse the XML with ElementTree Distinguishes between gzipped and normal format. Parameters: file -- The file to parse. gzipped -- Boolean indicating whether or not the file is gzipped. False by default. """ try: if gzipped: with gzip.open(file) as unzipped_file: return ET.parse(unzipped_file) return ET.parse(file) except ET.ParseError: raise ParsingError( filepath=file, reason="Element Tree ParseError was raised." ) def transform(self, log: str) ‑> List[Tuple[int, ...]]-
Transforms a XES log into integer sequences.
Parameters
log – A logfile to transform to integer sequences.
Returnsa transformed log.
Expand source code
def transform(self, log: str) -> List[Tuple[int, ...]]: """ Transforms a XES log into integer sequences. Parameters: log -- A logfile to transform to integer sequences. Returnsa transformed log. """ # Check the log self.__check_log(log) # Parse the log if possible root = self.__parse(log) # Build the name mapping name_mapping = self.__build_mapping(root, log) # Build the log return self.__make_log(root, name_mapping, log)