Coverage for crunch/django/app/models.py: 97.79%
226 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-10-01 13:43 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2023-10-01 13:43 +0000
1from operator import mod
2from typing import List
3import re
4from typing import Type
5from django.db import models
6from django_extensions.db.fields import AutoSlugField
7from django.utils.text import slugify
8from django.urls import reverse
9from django.contrib.auth import get_user_model
10from django.utils.html import format_html
11from django.db.models import OuterRef, Subquery
12from mptt.models import MPTTModel, TreeForeignKey
13import humanize
14from polymorphic.models import PolymorphicModel
15from django_extensions.db.models import TimeStampedModel
16from next_prev import next_in_order, prev_in_order
17from polymorphic_tree.models import PolymorphicMPTTModel, PolymorphicTreeForeignKey
20from . import enums, storages
22User = get_user_model()
25def OptionalCharField(max_length=255, default="", blank=True, **kwargs):
26 return models.CharField(
27 max_length=max_length, default=default, blank=blank, **kwargs
28 )
31class NextPrevMixin(models.Model):
32 class Meta:
33 abstract = True
35 def next_in_order(self, **kwargs):
36 return next_in_order(self, **kwargs)
38 def prev_in_order(self, **kwargs):
39 return prev_in_order(self, **kwargs)
41 def get_admin_url(self):
42 return reverse(
43 f"admin:{self._meta.app_label}_{self._meta.model_name}_change",
44 args=(self.pk,),
45 )
48class Item(NextPrevMixin, TimeStampedModel, PolymorphicMPTTModel):
49 """
50 A general class of object which can be placed in a hierachical tree.
52 Each item can be viewed online with a details page and each item can be given any number of attributes to store metadata.
53 """
54 parent = PolymorphicTreeForeignKey(
55 "self",
56 blank=True,
57 null=True,
58 default=None,
59 related_name="children",
60 on_delete=models.SET_DEFAULT,
61 )
62 name = models.CharField(max_length=1023, unique=True)
63 description = models.CharField(
64 max_length=1023,
65 default="",
66 blank=True,
67 help_text="A short description in a sentence or more of this item.",
68 )
69 details = models.TextField(
70 default="",
71 blank=True,
72 help_text="A detailed description of this item (written in Markdown).",
73 )
74 slug = AutoSlugField(populate_from="name", unique=True, max_length=255)
75 # TODO Add tags
77 def slugify_function(self, content):
78 slug = slugify(content)
79 if self.parent:
80 return f"{self.parent.slug}:{slug}"
81 return slug
83 class Meta(PolymorphicMPTTModel.Meta):
84 unique_together = ("parent", "slug")
85 ordering = ('created', 'pk')
87 def __str__(self):
88 return self.name
90 def get_absolute_url(self):
91 return reverse("crunch:item-detail", kwargs={"slug": self.slug})
93 def items(self):
94 return self.get_children()
96 def descendant_attributes(
97 self, attribute_type: Type = None, include_self: bool = True
98 ) -> models.QuerySet:
99 """Returns a queryset with all the attributes of the descendants of this item.
101 Args:
102 attribute_type (Type, optional): The type of the attribute to filter for. If `None` then it uses the `Attribute` class.
103 include_self (bool, optional): Whether or not to include attributes of this item. Defaults to True.
105 Returns:
106 models.QuerySet: A queryset of attributes of items descended from this item.
107 """
108 attribute_type = attribute_type or Attribute
109 return attribute_type.objects.filter(
110 item__in=self.get_descendants(include_self=include_self)
111 )
113 def descendant_total_filesize(self) -> int:
114 """
115 Sums all the filesize attributes for this item and its descendants.
117 Returns:
118 int: The total sum of the filesize attributes of this item and all its descendants.
119 If there are no filesize attributes then it returns None.
120 """
122 filesize_attributes = self.descendant_attributes(
123 attribute_type=FilesizeAttribute, include_self=True
124 )
126 if not filesize_attributes:
127 return None
129 return filesize_attributes.aggregate(models.Sum("value"))["value__sum"]
131 def descendant_total_filesize_readable(self) -> str:
132 """
133 Sums all the filesize attributes for this item and its descendants and converts it to a human readable string.
135 Returns:
136 str: The total sum of the filesize attributes of this item and all its descendants in a human readable string.
137 If there are no filesize attributes then it returns 'None'.
138 """
139 descendant_total_filesize = self.descendant_total_filesize()
141 if descendant_total_filesize:
142 return humanize.naturalsize(descendant_total_filesize)
144 return "None"
146 def map(self):
147 from .mapping import item_map
149 return item_map(self)
151 def descendant_latlongattributes(self):
152 return self.descendant_attributes(
153 attribute_type=LatLongAttribute, include_self=True
154 )
156 def has_descendant_latlongattributes(self):
157 return self.descendant_latlongattributes().count() > 0
159 def reslugify_descendants(self):
160 for item in self.get_descendants(include_self=True):
161 item.slug = item.slugify_function(item.name)
162 item.save()
165class Project(Item):
166 """
167 An item which collects a number of datasets which should be run with the same workflow.
169 Projects ought not to have parents in the tree structure of items.
170 """
171 workflow = models.TextField(
172 default="",
173 blank=True,
174 help_text="URL to snakemake repository/shell script or its content.",
175 )
176 # More workflow languages need to be supported.
177 # TODO assert parent is none
179 def get_absolute_url(self):
180 return reverse("crunch:project-detail", kwargs={"slug": self.slug})
182 def unprocessed_datasets(self) -> models.QuerySet:
183 """
184 Returns a QuerySet of all datasets in this project that are not complete and are not locked.
185 """
186 return Dataset.unprocessed().filter(id__in=self.items())
188 def completed_datasets(self) -> models.QuerySet:
189 """
190 Returns a QuerySet of all datasets in this project that are completed.
191 """
192 return Dataset.completed().filter(id__in=self.items())
194 def running_datasets(self) -> models.QuerySet:
195 """
196 Returns a QuerySet of all datasets in this project that are running.
197 """
198 return Dataset.running().filter(id__in=self.items())
200 def failed_datasets(self) -> models.QuerySet:
201 """
202 Returns a QuerySet of all datasets in this project that have failed.
203 """
204 return Dataset.failed().filter(id__in=self.items())
206 def next_unprocessed_dataset(self) -> "Dataset":
207 return self.unprocessed_datasets().first()
210class Dataset(Item):
211 """
212 An item should be run once in a workflow.
214 The parent of a dataset should be its project.
215 """
216 base_file_path = models.CharField(max_length=4096, default="", blank=True)
217 locked = models.BooleanField(
218 default=False,
219 help_text="If the dataset is locked then it will not show up in the loop of available datasets.",
220 )
222 def save(self, *args, **kwargs):
223 assert isinstance(self.parent, Project)
225 if not self.base_file_path:
226 self.base_file_path = storages.default_dataset_path(self.parent.slug, self.slug)
227 return super().save(*args, **kwargs)
229 def get_absolute_url(self) -> str:
230 return f"{self.parent.get_absolute_url()}datasets/{self.slug}"
232 @classmethod
233 def completed_ids(cls) -> List[int]:
234 """
235 Returns a list of ids of all datasets which have a status with stage UPLOAD and state SUCCESS.
236 """
237 return Status.completed().values_list("dataset__id", flat=True)
239 @classmethod
240 def completed(cls) -> models.QuerySet:
241 """
242 Returns a QuerySet of all datasets which have a status with stage UPLOAD and state SUCCESS.
243 """
244 return cls.objects.filter(id__in=cls.completed_ids())
246 @classmethod
247 def incomplete(cls) -> models.QuerySet:
248 """
249 Returns a QuerySet of all datasets that are not complete (including unprocessed, running and failed datasets).
250 """
251 return cls.objects.exclude(id__in=cls.completed_ids())
253 @classmethod
254 def unprocessed(cls) -> models.QuerySet:
255 """
256 Returns a QuerySet of all incomplete datasets that are not locked.
257 """
258 return cls.incomplete().filter(locked=False)
260 @classmethod
261 def inprocess(cls) -> models.QuerySet:
262 """
263 Returns a QuerySet of all incomplete datasets that are locked (including running and failed datasets).
264 """
265 return cls.incomplete().filter(locked=True)
267 @classmethod
268 def has_status(cls) -> models.QuerySet:
269 """
270 Returns a QuerySet of all datasets with at least one status.
271 """
272 statuses = Status.objects.filter(dataset=OuterRef("pk"))
273 return cls.objects.filter(models.Exists(statuses))
275 @classmethod
276 def failed(cls) -> models.QuerySet:
277 """
278 Returns a QuerySet of all incomplete unlocked datasets where the latest status has a state of 'FAILED'.
279 """
280 newest_statuses = Status.objects.filter(dataset=OuterRef("pk")).order_by(
281 "-created"
282 )
283 annotated = cls.inprocess().annotate(
284 newest_status_state=Subquery(newest_statuses.values("state")[:1])
285 )
286 return annotated.filter(newest_status_state=enums.State.FAIL)
288 @classmethod
289 def running(cls) -> models.QuerySet:
290 """
291 Returns a QuerySet of all incomplete unlocked datasets where the latest status does not have a state of 'FAILED'.
292 """
293 return cls.inprocess().exclude(id__in=cls.failed())
295 @classmethod
296 def next_unprocessed(cls) -> "Dataset":
297 return cls.unprocessed().first()
299 def files(self):
300 return storages.storage_walk(self.base_file_path)
302 def files_html(self):
303 try:
304 directory = storages.storage_walk(self.base_file_path)
305 return directory.render_html()
306 except Exception:
307 return f"<p>Failed to read files in dataset {self}</p>"
310class Status(NextPrevMixin, TimeStampedModel):
311 dataset = models.ForeignKey(
312 Dataset, on_delete=models.CASCADE, related_name="statuses"
313 )
314 site_user = models.ForeignKey(
315 User, on_delete=models.SET_DEFAULT, default=None, blank=True, null=True
316 )
317 stage = models.IntegerField(choices=enums.Stage.choices)
318 state = models.IntegerField(choices=enums.State.choices)
319 note = models.TextField(default="", blank=True)
320 # Diagnostic info
321 agent_user = OptionalCharField(
322 help_text="The name of the user running the agent (see https://docs.python.org/3/library/getpass.html)."
323 )
324 version = OptionalCharField(
325 help_text="The django-crunch version number of the agent."
326 )
327 revision = OptionalCharField(
328 help_text="The django-crunch git revision hash of the agent."
329 )
330 # terminal = OptionalCharField(help_text="the tty or pseudo-tty associated with the agent user (see https://psutil.readthedocs.io/en/latest/).")
331 system = OptionalCharField(
332 help_text="Returns the system/OS name, such as 'Linux', 'Darwin', 'Java', 'Windows' (see https://docs.python.org/3/library/platform.html)."
333 )
334 system_release = OptionalCharField(
335 help_text="Returns the system’s release, e.g. '2.2.0' or 'NT' (see https://docs.python.org/3/library/platform.html)."
336 )
337 system_version = OptionalCharField(
338 help_text="Returns the system’s release version, e.g. '#3 on degas' (see https://docs.python.org/3/library/platform.html)."
339 )
340 machine = OptionalCharField(
341 help_text="Returns the machine type, e.g. 'i386' (see https://docs.python.org/3/library/platform.html)."
342 )
343 hostname = OptionalCharField(
344 help_text="The hostname of the machine where the agent was running (see https://docs.python.org/3/library/socket.html)."
345 )
346 ip_address = OptionalCharField(
347 help_text="The hostname in IPv4 address format (see https://docs.python.org/3/library/socket.html)."
348 )
349 mac_address = OptionalCharField(
350 help_text="The hardware address (see https://docs.python.org/3/library/uuid.html)."
351 )
352 memory_total = models.BigIntegerField(
353 default=None,
354 blank=True,
355 null=True,
356 help_text="See https://psutil.readthedocs.io/en/latest/",
357 )
358 memory_free = models.BigIntegerField(
359 default=None,
360 blank=True,
361 null=True,
362 help_text="See https://psutil.readthedocs.io/en/latest/",
363 )
364 disk_total = models.BigIntegerField(
365 default=None,
366 blank=True,
367 null=True,
368 help_text="See https://psutil.readthedocs.io/en/latest/",
369 )
370 disk_free = models.BigIntegerField(
371 default=None,
372 blank=True,
373 null=True,
374 help_text="See https://psutil.readthedocs.io/en/latest/",
375 )
377 class Meta:
378 verbose_name_plural = "statuses"
380 def __str__(self):
381 return f"{self.dataset}: {self.get_stage_display()} {self.get_state_display()}"
383 def save(self, *args, **kwargs):
385 # Lock dataset if necessary
386 assert isinstance(self.dataset, Dataset)
387 if not self.dataset.locked:
388 self.dataset.locked = True
389 self.dataset.save()
391 super().save(*args, **kwargs)
393 @classmethod
394 def completed(cls):
395 return Status.objects.filter(
396 stage=enums.Stage.UPLOAD, state=enums.State.SUCCESS
397 )
400class Attribute(NextPrevMixin, TimeStampedModel, PolymorphicModel):
401 item = models.ForeignKey(Item, on_delete=models.CASCADE, related_name="attributes")
402 key = models.CharField(max_length=255)
404 def value_dict(self):
405 return dict(key=self.key)
407 def value_str(self):
408 raise NotImplementedError("value_str not implemented for this attribute class")
410 def value_html(self):
411 return self.value_str()
413 def __str__(self):
414 return f"{self.key}: {self.value_str()}"
416 def type_str(self) -> str:
417 """
418 Returns a string describing this type of attribute.
420 By default it returns the class name with spaces added where implied by camel case.
422 Returns:
423 str: The type of this attribute as a string.
424 """
425 class_name = self.__class__.__name__
426 if class_name.endswith("Attribute"):
427 class_name = class_name[: -len("Attribute")]
429 return re.sub(r"((?<=[a-z])[A-Z]|(?<!\A)[A-Z](?=[a-z]))", r" \1", class_name)
432class ValueAttribute(Attribute):
433 # Child classes need to give a 'value' field.
435 class Meta:
436 abstract = True
438 def value_dict(self):
439 d = super().value_dict()
440 d["value"] = self.value
441 return d
443 def value_str(self):
444 return self.value
447class CharAttribute(ValueAttribute):
448 """ An attribute for storing metadata as a string (of maximum length 1023 characters). """
449 value = models.CharField(max_length=1023)
452class FloatAttribute(ValueAttribute):
453 value = models.FloatField()
456class IntegerAttribute(ValueAttribute):
457 value = models.IntegerField()
460class FilesizeAttribute(ValueAttribute):
461 value = models.PositiveBigIntegerField(
462 help_text="The filesize of this item in bytes."
463 )
465 def value_str(self):
466 return humanize.naturalsize(self.value)
469class BooleanAttribute(ValueAttribute):
470 value = models.BooleanField()
473class DateTimeAttribute(ValueAttribute):
474 value = models.DateTimeField()
477class DateAttribute(ValueAttribute):
478 value = models.DateField()
481class URLAttribute(ValueAttribute):
482 value = models.URLField(max_length=1023)
484 def value_html(self):
485 return format_html(
486 "<a href='{}'>{}</a>",
487 self.value,
488 self.value,
489 )
492class LatLongAttribute(Attribute):
493 """ An attribute for storing a geolocation (in decimal degrees). """
494 latitude = models.DecimalField(
495 max_digits=12,
496 decimal_places=9,
497 help_text="The latitude of this location in decimal degrees.",
498 )
499 longitude = models.DecimalField(
500 max_digits=12,
501 decimal_places=9,
502 help_text="The longitude of this location in decimal degrees.",
503 )
505 def value_dict(self):
506 d = super().value_dict()
507 d["latitude"] = self.latitude
508 d["longitude"] = self.longitude
509 return d
511 def value_str(self):
512 return f"{self.latitude:+}{self.longitude:+}/"
514 def value_html(self):
515 return format_html(
516 "<a href='https://www.google.com/maps/place/{},{}'>{}</a>",
517 self.latitude,
518 self.longitude,
519 self.value_str(),
520 )