Source code for hifis_surveyval.models.question_collection

# hifis-surveyval
# Framework to help developing analysis scripts for the HIFIS Software survey.
#
# SPDX-FileCopyrightText: 2021 HIFIS Software <support@hifis.net>
#
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

"""
This module contains classes to represent groups of survey questions.

These can be constructed from YAML through the YamlConstructable abstract
class.
"""
from typing import Dict, List, Set
from typing import Optional as typing_Optional
from typing import Union

from pandas import DataFrame, Series, concat
from schema import Optional, Schema

from hifis_surveyval.models.mixins.identifiable import Identifiable
from hifis_surveyval.models.mixins.yaml_constructable import (
    YamlConstructable,
    YamlDict,
)
from hifis_surveyval.models.question import Question
from hifis_surveyval.models.translated import Translated


[docs]class QuestionCollection(YamlConstructable, Identifiable): """ QuestionCollections group a set of questions into a common context. This kind of question has no answers by itself. These are to be found in the according sub-questions. """ token_ID = "id" token_LABEL = "label" token_TEXT = "text" token_QUESTIONS = "questions" schema = Schema( { token_ID: str, token_LABEL: str, token_TEXT: dict, Optional(token_QUESTIONS, default=[]): list, Optional(str): object, # catchall } )
[docs] def __init__( self, collection_id: str, text: Translated, label: str, questions: List[Question], ) -> None: """ Initialize an empty question collection. It is recommended to refer to from_yaml_dictionary() for constructing instances when parsing metadata. Args: collection_id: The unique ID that is to be assigned to the collection. text: A Translated object representing the text that describes the question collection. label: A short label that can be used in plotting to represent the question collection. questions: A list of questions that are contained within the question collection. """ super().__init__(collection_id) # Question Collections have no parents self._text: Translated = text self._label: str = label self._questions: Dict[str, Question] = { question.short_id: question for question in questions }
[docs] def question_for_id(self, question_short_id: str) -> Question: """ Obtain a question from the collection for a given short ID. Args: question_short_id: The short id used within the collection for this question. Returns: The question for the given ID. Raises: KeyError - if no question with the given ID did exist. """ return self._questions[question_short_id]
[docs] def remove_answers(self, participant_ids: Set[str]) -> None: """ Remove the answers by the specified participants. Args: participant_ids: The IDs of the participants whose answers are to be removed. Invalid IDs are ignored. """ for question in self._questions.values(): question.remove_answers(participant_ids)
[docs] def as_data_frame( self, exclude_labels: typing_Optional[Union[str, List[str]]] = None ) -> DataFrame: """ Gather the answers given to each question as a data frame. Args: exclude_labels: A short label or a list of short labels for questions that are to be excluded from the data frame. Returns: A pandas data frame with participants in the rows and the questions of this collection in the columns. The fields in the data frame then contain the answer to a question for a given participant. """ excluded = [] if isinstance(exclude_labels, str): excluded.append(exclude_labels) elif isinstance(exclude_labels, list): excluded.extend(exclude_labels) # Nothing to do in any other case question_series: List[Series] = [] for (label, question) in self._questions.items(): if label in excluded: continue question_series.append(question.as_series()) return concat(question_series, axis=1) # Note (0)
# Note (0) The series are joined row-wise, so each participant in the # row labels (aka indexes) is associated with each answer according to # the question (Question ID in the column labels). @staticmethod def _from_yaml_dictionary( yaml: YamlDict, **kwargs ) -> "QuestionCollection": """ Generate a new QuestionCollection-instance from YAML data. Args: yaml: A YAML dictionary describing the Question **kwargs: Only here to satisfy the inherited method signature. Returns: A new Question containing the provided data """ collection_id = yaml[QuestionCollection.token_ID] questions = [ Question.from_yaml_dictionary( yaml=question_yaml, parent_id=collection_id ) for question_yaml in yaml[QuestionCollection.token_QUESTIONS] ] text = Translated.from_yaml_dictionary( yaml[QuestionCollection.token_TEXT] ) return QuestionCollection( collection_id=collection_id, text=text, label=yaml[QuestionCollection.token_LABEL], questions=questions, )