Source code for datasources.models.quality

"""
This module contains models related to the quality assessment of data sources.
"""
import itertools

from django.db import models

from core.models import MAX_LENGTH_NAME
from .datasource import DataSource
from .metadata import MetadataField


[docs]class QualityRuleset(models.Model): """ A ruleset for assessing the quality of a data source. """ class Meta: unique_together = (('name', 'version',),) # Prevent template engine from trying to call the model do_not_call_in_templates = True #: Name of the ruleset name = models.CharField(max_length=MAX_LENGTH_NAME, blank=False, null=False) #: Short text identifier short_name = models.CharField(max_length=MAX_LENGTH_NAME, unique=True, blank=True, null=False) #: Ruleset version - distinguishes successive versions of the same set version = models.CharField(max_length=MAX_LENGTH_NAME, blank=False, null=False) def __call__(self, datasource: DataSource) -> int: """ Evaluate a data source to get its quality level under this ruleset. :param datasource: Data source to assess :return: Quality level of data source """ # Get list of all levels that the data source passes - stop when a fail is encountered passes = list( itertools.takewhile( lambda x: x(datasource), self.levels.all() ) ) try: # Highest passed level will be the last in the list return passes[-1].level except IndexError: return 0 def __str__(self): return '{0} - {1}'.format(self.name, self.version)
[docs]class QualityLevel(models.Model): """ A set of criteria that is required to grant a particular quality level. """ class Meta: unique_together = (('ruleset', 'level'),) ordering = ['level'] # Prevent template engine from trying to call the model do_not_call_in_templates = True #: Which ruleset does this level belong to? ruleset = models.ForeignKey(QualityRuleset, related_name='levels', on_delete=models.CASCADE, blank=False, null=False) #: What level is this? level = models.PositiveSmallIntegerField(blank=False, null=False) #: Threshold level that must be exceeded by criteria weights to pass this level threshold = models.FloatField(blank=True, null=True) def __call__(self, datasource: DataSource, rtol: float = 1e-3) -> bool: """ Does a data source pass the criteria for this quality level? :param datasource: Data source to assess :param rtol: Relative tolerance to compare floating point threshold :return: Passes this quality level? """ threshold = self.threshold if threshold is None: threshold = self.criteria.aggregate(models.Sum('weight'))['weight__sum'] total = sum(criterion(datasource) for criterion in self.criteria.all()) # Compare using relative tolerance to account for floating point error return total >= (threshold * (1 - rtol)) def __str__(self): return '{0} - level {1}'.format(self.ruleset, self.level)
[docs]class QualityCriterion(models.Model): """ A weighted criterion to determine whether a data source meets a certain quality level. """ class Meta: unique_together = (('quality_level', 'metadata_field'),) verbose_name_plural = 'quality criteria' # Prevent template engine from trying to call the model do_not_call_in_templates = True #: Which quality level does this criterion belong to? quality_level = models.ForeignKey(QualityLevel, related_name='criteria', on_delete=models.CASCADE, blank=False, null=False) #: Which metadata field represents this criterion? metadata_field = models.ForeignKey(MetadataField, related_name='+', on_delete=models.PROTECT, blank=False, null=False) #: What proportion of the quality level does this criterion provide? weight = models.FloatField(default=1, blank=False, null=False) def __call__(self, datasource: DataSource) -> float: """ Weight provided to the quality level based on passing or failing this criterion. :param datasource: Data source to assess :return: Weight provided to the quality level from passing or failing this criterion """ return self.weight if datasource.metadata_items.filter(field=self.metadata_field).exists() else 0 def __str__(self): return '{0} - weight {1}'.format(self.metadata_field, self.weight)