Module integer_sequences.generate_datasets

Expand source code
# ! /home/dan/miniconda3/bin/conda "run -n internship python"
# -*- coding: utf-8 -*-
from typing import List, Tuple, Dict
from os import listdir
from os.path import join


# Own
from generator import SequenceGenerator, XESTransformator
from helper import DATAPATH, dumps, LOGPATH


def transform() -> None:
    """
    Generates datasets by transforming XES logs.

    Raises an OSError when it could not succesfully save the generated log.
    """
    # Bad check for file name extension, but Transformator class checks properly
    TO_TRANSFORM = [file for file in listdir(DATAPATH) if ".xes" in file]

    transformator = XESTransformator()

    for to_transform in TO_TRANSFORM:
        print(f"Attempting to transform '{to_transform}' with '{transformator}'...")
        transformed = transformator.transform(join(DATAPATH, to_transform))
        success = dumps(join(LOGPATH, f"{to_transform}-transformed"), transformed)
        if not success:
            raise OSError("Failed to dump data to disk..")
        else:
            print("Success!")


def generate() -> None:
    """
    Generates datasets with sequence generators. Refer to `configurations_to_use` for all used generation configs.

    Raises an OSError when it could not succesfully save the generated log.
    """
    # Shorthand for a bunch of numbers to be used in configs below
    # Exclude 0 for multiplication issues
    some_numbers = list(range(1, 101))

    configurations_to_use: List[Tuple[str, str, Dict[str, List[int]]]] = [
        # Fib, fixed 0 first element, random seconds
        ("fib", "fixed0-random", {"firsts": [0], "seconds": some_numbers}),
        # Fib, fixed 0 second element, random firsts
        ("fib", "random-fixed0", {"firsts": some_numbers, "seconds": [0]}),
        # Fib, fixed 1 first element, random seconds
        ("fib", "fixed1-random", {"firsts": [1], "seconds": some_numbers}),
        # Fib, fixed 1 second element, random firsts
        ("fib", "random-fixed1", {"firsts": some_numbers, "seconds": [1]}),
        # Fib, generic, random all
        ("fib", "generic", {"firsts": some_numbers, "seconds": some_numbers}),
        # Pascal, no cases
        ("pascal", "", {"firsts": some_numbers}),
        # Recaman, no cases
        ("recaman", "", {"firsts": some_numbers}),
        # Catalan, no cases
        ("catalan", "", {"firsts": some_numbers}),
        # Counting up from various, identical step sizes
        # Generated and then unpacked
        *[
            ("range_up", f"step-{step}", {"firsts": some_numbers, "steps": [step]})
            for step in some_numbers
        ],
        # Counting down to various, identical step sizes
        # Generated and then unpacked
        *[
            ("range_down", f"step-{step}", {"lasts": some_numbers, "steps": [step]})
            for step in some_numbers
        ],
        # Various long term configs
        (
            "long_term_dependency",
            "firsts0",
            {
                "firsts": some_numbers,
                "seconds": [0],
                "thirds": [0],
                "fourths": [0],
                "fifths": [0],
            },
        ),
        (
            "long_term_dependency",
            "seconds0",
            {
                "firsts": [0],
                "seconds": some_numbers,
                "thirds": [0],
                "fourths": [0],
                "fifths": [0],
            },
        ),
        (
            "long_term_dependency",
            "thirds0",
            {
                "firsts": [0],
                "seconds": [0],
                "thirds": some_numbers,
                "fourths": [0],
                "fifths": [0],
            },
        ),
        (
            "long_term_dependency",
            "fourths0",
            {
                "firsts": [0],
                "seconds": [0],
                "thirds": [0],
                "fourths": some_numbers,
                "fifths": [0],
            },
        ),
        (
            "long_term_dependency",
            "fifths0",
            {
                "firsts": [0],
                "seconds": [0],
                "thirds": [0],
                "fourths": [0],
                "fifths": some_numbers,
            },
        ),
        (
            "long_term_dependency",
            "firsts1",
            {
                "firsts": some_numbers,
                "seconds": [1],
                "thirds": [1],
                "fourths": [1],
                "fifths": [1],
            },
        ),
        (
            "long_term_dependency",
            "seconds1",
            {
                "firsts": [1],
                "seconds": some_numbers,
                "thirds": [1],
                "fourths": [1],
                "fifths": [1],
            },
        ),
        (
            "long_term_dependency",
            "thirds1",
            {
                "firsts": [1],
                "seconds": [1],
                "thirds": some_numbers,
                "fourths": [1],
                "fifths": [1],
            },
        ),
        (
            "long_term_dependency",
            "fourths1",
            {
                "firsts": [1],
                "seconds": [1],
                "thirds": [1],
                "fourths": some_numbers,
                "fifths": [1],
            },
        ),
        (
            "long_term_dependency",
            "fifths1",
            {
                "firsts": [1],
                "seconds": [1],
                "thirds": [1],
                "fourths": [1],
                "fifths": some_numbers,
            },
        ),
        # Cannot go up to 100 - memory error on laptop (could have been expected)
        # In stead, go up to 10
        (
            "long_term_dependency",
            "generic",
            {
                "firsts": list(range(1, 11)),
                "seconds": list(range(1, 11)),
                "thirds": list(range(1, 11)),
                "fourths": list(range(1, 11)),
                "fifths": list(range(1, 11)),
            },
        ),
        # Various long term single dependency logs
        *[
            (
                "long_term_single_dependency",
                f"firsts0-const{const}",
                {
                    "firsts": some_numbers,
                    "seconds": [0],
                    "thirds": [0],
                    "fourths": [0],
                    "fifths": [0],
                    "constants": [const],
                },
            )
            for const in some_numbers
        ],
        *[
            (
                "long_term_single_dependency",
                f"seconds0-const{const}",
                {
                    "firsts": [0],
                    "seconds": some_numbers,
                    "thirds": [0],
                    "fourths": [0],
                    "fifths": [0],
                    "constants": [const],
                },
            )
            for const in some_numbers
        ],
        *[
            (
                "long_term_single_dependency",
                f"thirds0-const{const}",
                {
                    "firsts": [0],
                    "seconds": [0],
                    "thirds": some_numbers,
                    "fourths": [0],
                    "fifths": [0],
                    "constants": [const],
                },
            )
            for const in some_numbers
        ],
        *[
            (
                "long_term_single_dependency",
                f"fourths0-const{const}",
                {
                    "firsts": [0],
                    "seconds": [0],
                    "thirds": [0],
                    "fourths": some_numbers,
                    "fifths": [0],
                    "constants": [const],
                },
            )
            for const in some_numbers
        ],
        *[
            (
                "long_term_single_dependency",
                f"fifths0-const{const}",
                {
                    "firsts": [0],
                    "seconds": [0],
                    "thirds": [0],
                    "fourths": [0],
                    "fifths": some_numbers,
                    "constants": [const],
                },
            )
            for const in some_numbers
        ],
        *[
            (
                "long_term_single_dependency",
                f"firsts1-const{const}",
                {
                    "firsts": some_numbers,
                    "seconds": [1],
                    "thirds": [1],
                    "fourths": [1],
                    "fifths": [1],
                    "constants": [const],
                },
            )
            for const in some_numbers
        ],
        *[
            (
                "long_term_single_dependency",
                f"seconds1-const{const}",
                {
                    "firsts": [1],
                    "seconds": some_numbers,
                    "thirds": [1],
                    "fourths": [1],
                    "fifths": [1],
                    "constants": [const],
                },
            )
            for const in some_numbers
        ],
        *[
            (
                "long_term_single_dependency",
                f"thirds1-const{const}",
                {
                    "firsts": [1],
                    "seconds": [1],
                    "thirds": some_numbers,
                    "fourths": [1],
                    "fifths": [1],
                    "constants": [const],
                },
            )
            for const in some_numbers
        ],
        *[
            (
                "long_term_single_dependency",
                f"fourths1-const{const}",
                {
                    "firsts": [1],
                    "seconds": [1],
                    "thirds": [1],
                    "fourths": some_numbers,
                    "fifths": [1],
                    "constants": [const],
                },
            )
            for const in some_numbers
        ],
        *[
            (
                "long_term_single_dependency",
                f"fifths1-const{const}",
                {
                    "firsts": [1],
                    "seconds": [1],
                    "thirds": [1],
                    "fourths": [1],
                    "fifths": some_numbers,
                    "constants": [const],
                },
            )
            for const in some_numbers
        ],
        # Same issue, go only through lists of 10
        *[
            (
                "long_term_single_dependency",
                f"generic-const{const}",
                {
                    "firsts": list(range(1, 11)),
                    "seconds": list(range(1, 11)),
                    "thirds": list(range(1, 11)),
                    "fourths": list(range(1, 11)),
                    "fifths": list(range(1, 11)),
                    "constants": [const],
                },
            )
            for const in some_numbers
        ],
        # Short term configurations
        *[
            (
                "short_term_single_dependency",
                f"const-{const}",
                {"firsts": some_numbers, "constants": [const]},
            )
            for const in some_numbers
        ],
    ]
    """
    All configurations used to generate the datasets.
    This list generates a total of 1420 datasets.
    Most of them are very similar, and do not need to be used in analysis.
    """

    generator = SequenceGenerator(wanted_length=100)
    for (key, case_name, args) in configurations_to_use:
        generated_log = generator.generate_log(key, **args)
        success = dumps(join(LOGPATH, f"{key}-{case_name}"), generated_log)
        if not success:
            raise OSError("Failed to dump data to disk..")
        else:
            print(f"Success: '{key}-{case_name}'")


def generate_datasets() -> None:
    """
    Generates all datasets.
    """

    # Datasets from XES
    print("Generating datasets from XES...")
    transform()
    print("Done!")

    # Datasets from generators
    print("Generating datasets with generators...")
    generate()
    print("Done!")


if __name__ == "__main__":
    generate_datasets()

else:
    print(
        "File (generate_datasets.py) imported. If this isn't to generate documentation,\
         what are you doing?"
    )

Functions

def generate() ‑> NoneType

Generates datasets with sequence generators. Refer to configurations_to_use for all used generation configs.

Raises an OSError when it could not succesfully save the generated log.

Expand source code
def generate() -> None:
    """
    Generates datasets with sequence generators. Refer to `configurations_to_use` for all used generation configs.

    Raises an OSError when it could not succesfully save the generated log.
    """
    # Shorthand for a bunch of numbers to be used in configs below
    # Exclude 0 for multiplication issues
    some_numbers = list(range(1, 101))

    configurations_to_use: List[Tuple[str, str, Dict[str, List[int]]]] = [
        # Fib, fixed 0 first element, random seconds
        ("fib", "fixed0-random", {"firsts": [0], "seconds": some_numbers}),
        # Fib, fixed 0 second element, random firsts
        ("fib", "random-fixed0", {"firsts": some_numbers, "seconds": [0]}),
        # Fib, fixed 1 first element, random seconds
        ("fib", "fixed1-random", {"firsts": [1], "seconds": some_numbers}),
        # Fib, fixed 1 second element, random firsts
        ("fib", "random-fixed1", {"firsts": some_numbers, "seconds": [1]}),
        # Fib, generic, random all
        ("fib", "generic", {"firsts": some_numbers, "seconds": some_numbers}),
        # Pascal, no cases
        ("pascal", "", {"firsts": some_numbers}),
        # Recaman, no cases
        ("recaman", "", {"firsts": some_numbers}),
        # Catalan, no cases
        ("catalan", "", {"firsts": some_numbers}),
        # Counting up from various, identical step sizes
        # Generated and then unpacked
        *[
            ("range_up", f"step-{step}", {"firsts": some_numbers, "steps": [step]})
            for step in some_numbers
        ],
        # Counting down to various, identical step sizes
        # Generated and then unpacked
        *[
            ("range_down", f"step-{step}", {"lasts": some_numbers, "steps": [step]})
            for step in some_numbers
        ],
        # Various long term configs
        (
            "long_term_dependency",
            "firsts0",
            {
                "firsts": some_numbers,
                "seconds": [0],
                "thirds": [0],
                "fourths": [0],
                "fifths": [0],
            },
        ),
        (
            "long_term_dependency",
            "seconds0",
            {
                "firsts": [0],
                "seconds": some_numbers,
                "thirds": [0],
                "fourths": [0],
                "fifths": [0],
            },
        ),
        (
            "long_term_dependency",
            "thirds0",
            {
                "firsts": [0],
                "seconds": [0],
                "thirds": some_numbers,
                "fourths": [0],
                "fifths": [0],
            },
        ),
        (
            "long_term_dependency",
            "fourths0",
            {
                "firsts": [0],
                "seconds": [0],
                "thirds": [0],
                "fourths": some_numbers,
                "fifths": [0],
            },
        ),
        (
            "long_term_dependency",
            "fifths0",
            {
                "firsts": [0],
                "seconds": [0],
                "thirds": [0],
                "fourths": [0],
                "fifths": some_numbers,
            },
        ),
        (
            "long_term_dependency",
            "firsts1",
            {
                "firsts": some_numbers,
                "seconds": [1],
                "thirds": [1],
                "fourths": [1],
                "fifths": [1],
            },
        ),
        (
            "long_term_dependency",
            "seconds1",
            {
                "firsts": [1],
                "seconds": some_numbers,
                "thirds": [1],
                "fourths": [1],
                "fifths": [1],
            },
        ),
        (
            "long_term_dependency",
            "thirds1",
            {
                "firsts": [1],
                "seconds": [1],
                "thirds": some_numbers,
                "fourths": [1],
                "fifths": [1],
            },
        ),
        (
            "long_term_dependency",
            "fourths1",
            {
                "firsts": [1],
                "seconds": [1],
                "thirds": [1],
                "fourths": some_numbers,
                "fifths": [1],
            },
        ),
        (
            "long_term_dependency",
            "fifths1",
            {
                "firsts": [1],
                "seconds": [1],
                "thirds": [1],
                "fourths": [1],
                "fifths": some_numbers,
            },
        ),
        # Cannot go up to 100 - memory error on laptop (could have been expected)
        # In stead, go up to 10
        (
            "long_term_dependency",
            "generic",
            {
                "firsts": list(range(1, 11)),
                "seconds": list(range(1, 11)),
                "thirds": list(range(1, 11)),
                "fourths": list(range(1, 11)),
                "fifths": list(range(1, 11)),
            },
        ),
        # Various long term single dependency logs
        *[
            (
                "long_term_single_dependency",
                f"firsts0-const{const}",
                {
                    "firsts": some_numbers,
                    "seconds": [0],
                    "thirds": [0],
                    "fourths": [0],
                    "fifths": [0],
                    "constants": [const],
                },
            )
            for const in some_numbers
        ],
        *[
            (
                "long_term_single_dependency",
                f"seconds0-const{const}",
                {
                    "firsts": [0],
                    "seconds": some_numbers,
                    "thirds": [0],
                    "fourths": [0],
                    "fifths": [0],
                    "constants": [const],
                },
            )
            for const in some_numbers
        ],
        *[
            (
                "long_term_single_dependency",
                f"thirds0-const{const}",
                {
                    "firsts": [0],
                    "seconds": [0],
                    "thirds": some_numbers,
                    "fourths": [0],
                    "fifths": [0],
                    "constants": [const],
                },
            )
            for const in some_numbers
        ],
        *[
            (
                "long_term_single_dependency",
                f"fourths0-const{const}",
                {
                    "firsts": [0],
                    "seconds": [0],
                    "thirds": [0],
                    "fourths": some_numbers,
                    "fifths": [0],
                    "constants": [const],
                },
            )
            for const in some_numbers
        ],
        *[
            (
                "long_term_single_dependency",
                f"fifths0-const{const}",
                {
                    "firsts": [0],
                    "seconds": [0],
                    "thirds": [0],
                    "fourths": [0],
                    "fifths": some_numbers,
                    "constants": [const],
                },
            )
            for const in some_numbers
        ],
        *[
            (
                "long_term_single_dependency",
                f"firsts1-const{const}",
                {
                    "firsts": some_numbers,
                    "seconds": [1],
                    "thirds": [1],
                    "fourths": [1],
                    "fifths": [1],
                    "constants": [const],
                },
            )
            for const in some_numbers
        ],
        *[
            (
                "long_term_single_dependency",
                f"seconds1-const{const}",
                {
                    "firsts": [1],
                    "seconds": some_numbers,
                    "thirds": [1],
                    "fourths": [1],
                    "fifths": [1],
                    "constants": [const],
                },
            )
            for const in some_numbers
        ],
        *[
            (
                "long_term_single_dependency",
                f"thirds1-const{const}",
                {
                    "firsts": [1],
                    "seconds": [1],
                    "thirds": some_numbers,
                    "fourths": [1],
                    "fifths": [1],
                    "constants": [const],
                },
            )
            for const in some_numbers
        ],
        *[
            (
                "long_term_single_dependency",
                f"fourths1-const{const}",
                {
                    "firsts": [1],
                    "seconds": [1],
                    "thirds": [1],
                    "fourths": some_numbers,
                    "fifths": [1],
                    "constants": [const],
                },
            )
            for const in some_numbers
        ],
        *[
            (
                "long_term_single_dependency",
                f"fifths1-const{const}",
                {
                    "firsts": [1],
                    "seconds": [1],
                    "thirds": [1],
                    "fourths": [1],
                    "fifths": some_numbers,
                    "constants": [const],
                },
            )
            for const in some_numbers
        ],
        # Same issue, go only through lists of 10
        *[
            (
                "long_term_single_dependency",
                f"generic-const{const}",
                {
                    "firsts": list(range(1, 11)),
                    "seconds": list(range(1, 11)),
                    "thirds": list(range(1, 11)),
                    "fourths": list(range(1, 11)),
                    "fifths": list(range(1, 11)),
                    "constants": [const],
                },
            )
            for const in some_numbers
        ],
        # Short term configurations
        *[
            (
                "short_term_single_dependency",
                f"const-{const}",
                {"firsts": some_numbers, "constants": [const]},
            )
            for const in some_numbers
        ],
    ]
    """
    All configurations used to generate the datasets.
    This list generates a total of 1420 datasets.
    Most of them are very similar, and do not need to be used in analysis.
    """

    generator = SequenceGenerator(wanted_length=100)
    for (key, case_name, args) in configurations_to_use:
        generated_log = generator.generate_log(key, **args)
        success = dumps(join(LOGPATH, f"{key}-{case_name}"), generated_log)
        if not success:
            raise OSError("Failed to dump data to disk..")
        else:
            print(f"Success: '{key}-{case_name}'")
def generate_datasets() ‑> NoneType

Generates all datasets.

Expand source code
def generate_datasets() -> None:
    """
    Generates all datasets.
    """

    # Datasets from XES
    print("Generating datasets from XES...")
    transform()
    print("Done!")

    # Datasets from generators
    print("Generating datasets with generators...")
    generate()
    print("Done!")
def transform() ‑> NoneType

Generates datasets by transforming XES logs.

Raises an OSError when it could not succesfully save the generated log.

Expand source code
def transform() -> None:
    """
    Generates datasets by transforming XES logs.

    Raises an OSError when it could not succesfully save the generated log.
    """
    # Bad check for file name extension, but Transformator class checks properly
    TO_TRANSFORM = [file for file in listdir(DATAPATH) if ".xes" in file]

    transformator = XESTransformator()

    for to_transform in TO_TRANSFORM:
        print(f"Attempting to transform '{to_transform}' with '{transformator}'...")
        transformed = transformator.transform(join(DATAPATH, to_transform))
        success = dumps(join(LOGPATH, f"{to_transform}-transformed"), transformed)
        if not success:
            raise OSError("Failed to dump data to disk..")
        else:
            print("Success!")