TestModel

This module contains tests for evaluating the performance of models on anime and manga datasets. It includes fixtures and test functions to ensure that the models are correctly loaded, similarities are calculated, and evaluation results are saved properly.

The tests verify

Model and embedding loading functionality
Similarity calculation between new descriptions and existing content
Proper saving and structure of evaluation results
Consistent behavior across both anime and manga datasets

new_description ¶

new_description() -> str

Fixture that provides a new description for testing similarity calculations.

The description represents a common isekai anime/manga plot to test against the datasets.

RETURNS	DESCRIPTION
`str`	A test description about a character being reborn in another world as a slime. TYPE: `str`

Source code in tests/test_model.py

@pytest.fixture
def new_description() -> str:
    """
    Fixture that provides a new description for testing similarity calculations.

    The description represents a common isekai anime/manga plot to test against the datasets.

    Returns:
        str: A test description about a character being reborn in another world as a slime.
    """
    return (
        "The main character is a 37 year old man who is stabbed and dies, "
        "but is reborn as a slime in a different world."
    )

test_anime_model ¶

test_anime_model(new_description: str, model_name: str) -> None

Test the anime model's ability to find similar content based on description.

This test verifies

Proper loading of the model and anime embeddings
Accurate calculation of similarities between new description and existing anime
Correct structure and saving of evaluation results
Expected number and format of top similar results

PARAMETER	DESCRIPTION
`new_description`	A test description to compare against the anime database. TYPE: `str`
`model_name`	The identifier of the model being tested. TYPE: `str`

RAISES	DESCRIPTION
`AssertionError`	If any of the test conditions fail, including file existence, data structure, or expected result format.

Source code in tests/test_model.py

@pytest.mark.order(11)
def test_anime_model(new_description: str, model_name: str) -> None:  # pylint: disable=redefined-outer-name
    """
    Test the anime model's ability to find similar content based on description.

    This test verifies:
        1. Proper loading of the model and anime embeddings
        2. Accurate calculation of similarities between new description and existing anime
        3. Correct structure and saving of evaluation results
        4. Expected number and format of top similar results

    Args:
        new_description (str): A test description to compare against the anime database.
        model_name (str): The identifier of the model being tested.

    Raises:
        AssertionError: If any of the test conditions fail, including file existence,
                       data structure, or expected result format.
    """
    dataset_type = "anime"
    top_n = 5

    model, df, synopsis_columns, embeddings_save_dir = load_model_and_embeddings(
        model_name, dataset_type
    )
    top_results: List[Dict[str, float]] = calculate_similarities(
        model, df, synopsis_columns, embeddings_save_dir, new_description, top_n
    )

    assert len(top_results) == top_n
    for result in top_results:
        assert "title" in result
        assert "synopsis" in result
        assert "similarity" in result

    evaluation_results = save_evaluation_results(
        "./model/evaluation_results_anime.json",
        model_name,
        dataset_type,
        new_description,
        top_results,
    )
    assert os.path.exists(evaluation_results)
    with open(evaluation_results, "r", encoding="utf-8") as f:
        evaluation_data = json.load(f)
    assert len(evaluation_data) > 0
    assert isinstance(evaluation_data, list), "evaluation_results should be a list"
    assert isinstance(
        evaluation_data[-1], dict
    ), "Last item in evaluation_results should be a dictionary"
    assert "model_name" in evaluation_data[-1]
    assert "dataset_type" in evaluation_data[-1]
    assert "new_description" in evaluation_data[-1]
    assert len(evaluation_data[-1]["top_similarities"]) == top_n

test_manga_model ¶

test_manga_model(new_description: str, model_name: str) -> None

Test the manga model's ability to find similar content based on description.

This test verifies

Proper loading of the model and manga embeddings
Accurate calculation of similarities between new description and existing manga
Correct structure and saving of evaluation results
Expected number and format of top similar results

PARAMETER	DESCRIPTION
`new_description`	A test description to compare against the manga database. TYPE: `str`
`model_name`	The identifier of the model being tested. TYPE: `str`

RAISES	DESCRIPTION
`AssertionError`	If any of the test conditions fail, including file existence, data structure, or expected result format.

Source code in tests/test_model.py

@pytest.mark.order(12)
def test_manga_model(new_description: str, model_name: str) -> None:  # pylint: disable=redefined-outer-name
    """
    Test the manga model's ability to find similar content based on description.

    This test verifies:
        1. Proper loading of the model and manga embeddings
        2. Accurate calculation of similarities between new description and existing manga
        3. Correct structure and saving of evaluation results
        4. Expected number and format of top similar results

    Args:
        new_description (str): A test description to compare against the manga database.
        model_name (str): The identifier of the model being tested.

    Raises:
        AssertionError: If any of the test conditions fail, including file existence,
                       data structure, or expected result format.
    """
    dataset_type = "manga"
    top_n = 5

    model, df, synopsis_columns, embeddings_save_dir = load_model_and_embeddings(
        model_name, dataset_type
    )
    top_results: List[Dict[str, float]] = calculate_similarities(
        model, df, synopsis_columns, embeddings_save_dir, new_description, top_n
    )

    assert len(top_results) == top_n
    for result in top_results:
        assert "title" in result
        assert "synopsis" in result
        assert "similarity" in result

    evaluation_results = save_evaluation_results(
        "./model/evaluation_results_manga.json",
        model_name,
        dataset_type,
        new_description,
        top_results,
    )
    assert os.path.exists(evaluation_results)
    with open(evaluation_results, "r", encoding="utf-8") as f:
        evaluation_data = json.load(f)
    assert len(evaluation_data) > 0
    assert isinstance(evaluation_data, list), "evaluation_results should be a list"
    assert isinstance(
        evaluation_data[-1], dict
    ), "Last item in evaluation_results should be a dictionary"
    assert "model_name" in evaluation_data[-1]
    assert "dataset_type" in evaluation_data[-1]
    assert "new_description" in evaluation_data[-1]
    assert len(evaluation_data[-1]["top_similarities"]) == top_n