Coverage for crunch/django/app/models.py: 97.79%

226 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-10-01 13:43 +0000

1from operator import mod 

2from typing import List 

3import re 

4from typing import Type 

5from django.db import models 

6from django_extensions.db.fields import AutoSlugField 

7from django.utils.text import slugify 

8from django.urls import reverse 

9from django.contrib.auth import get_user_model 

10from django.utils.html import format_html 

11from django.db.models import OuterRef, Subquery 

12from mptt.models import MPTTModel, TreeForeignKey 

13import humanize 

14from polymorphic.models import PolymorphicModel 

15from django_extensions.db.models import TimeStampedModel 

16from next_prev import next_in_order, prev_in_order 

17from polymorphic_tree.models import PolymorphicMPTTModel, PolymorphicTreeForeignKey 

18 

19 

20from . import enums, storages 

21 

22User = get_user_model() 

23 

24 

25def OptionalCharField(max_length=255, default="", blank=True, **kwargs): 

26 return models.CharField( 

27 max_length=max_length, default=default, blank=blank, **kwargs 

28 ) 

29 

30 

31class NextPrevMixin(models.Model): 

32 class Meta: 

33 abstract = True 

34 

35 def next_in_order(self, **kwargs): 

36 return next_in_order(self, **kwargs) 

37 

38 def prev_in_order(self, **kwargs): 

39 return prev_in_order(self, **kwargs) 

40 

41 def get_admin_url(self): 

42 return reverse( 

43 f"admin:{self._meta.app_label}_{self._meta.model_name}_change", 

44 args=(self.pk,), 

45 ) 

46 

47 

48class Item(NextPrevMixin, TimeStampedModel, PolymorphicMPTTModel): 

49 """ 

50 A general class of object which can be placed in a hierachical tree.  

51 

52 Each item can be viewed online with a details page and each item can be given any number of attributes to store metadata. 

53 """ 

54 parent = PolymorphicTreeForeignKey( 

55 "self", 

56 blank=True, 

57 null=True, 

58 default=None, 

59 related_name="children", 

60 on_delete=models.SET_DEFAULT, 

61 ) 

62 name = models.CharField(max_length=1023, unique=True) 

63 description = models.CharField( 

64 max_length=1023, 

65 default="", 

66 blank=True, 

67 help_text="A short description in a sentence or more of this item.", 

68 ) 

69 details = models.TextField( 

70 default="", 

71 blank=True, 

72 help_text="A detailed description of this item (written in Markdown).", 

73 ) 

74 slug = AutoSlugField(populate_from="name", unique=True, max_length=255) 

75 # TODO Add tags 

76 

77 def slugify_function(self, content): 

78 slug = slugify(content) 

79 if self.parent: 

80 return f"{self.parent.slug}:{slug}" 

81 return slug 

82 

83 class Meta(PolymorphicMPTTModel.Meta): 

84 unique_together = ("parent", "slug") 

85 ordering = ('created', 'pk') 

86 

87 def __str__(self): 

88 return self.name 

89 

90 def get_absolute_url(self): 

91 return reverse("crunch:item-detail", kwargs={"slug": self.slug}) 

92 

93 def items(self): 

94 return self.get_children() 

95 

96 def descendant_attributes( 

97 self, attribute_type: Type = None, include_self: bool = True 

98 ) -> models.QuerySet: 

99 """Returns a queryset with all the attributes of the descendants of this item. 

100 

101 Args: 

102 attribute_type (Type, optional): The type of the attribute to filter for. If `None` then it uses the `Attribute` class. 

103 include_self (bool, optional): Whether or not to include attributes of this item. Defaults to True. 

104 

105 Returns: 

106 models.QuerySet: A queryset of attributes of items descended from this item. 

107 """ 

108 attribute_type = attribute_type or Attribute 

109 return attribute_type.objects.filter( 

110 item__in=self.get_descendants(include_self=include_self) 

111 ) 

112 

113 def descendant_total_filesize(self) -> int: 

114 """ 

115 Sums all the filesize attributes for this item and its descendants. 

116 

117 Returns: 

118 int: The total sum of the filesize attributes of this item and all its descendants.  

119 If there are no filesize attributes then it returns None. 

120 """ 

121 

122 filesize_attributes = self.descendant_attributes( 

123 attribute_type=FilesizeAttribute, include_self=True 

124 ) 

125 

126 if not filesize_attributes: 

127 return None 

128 

129 return filesize_attributes.aggregate(models.Sum("value"))["value__sum"] 

130 

131 def descendant_total_filesize_readable(self) -> str: 

132 """ 

133 Sums all the filesize attributes for this item and its descendants and converts it to a human readable string. 

134 

135 Returns: 

136 str: The total sum of the filesize attributes of this item and all its descendants in a human readable string.  

137 If there are no filesize attributes then it returns 'None'. 

138 """ 

139 descendant_total_filesize = self.descendant_total_filesize() 

140 

141 if descendant_total_filesize: 

142 return humanize.naturalsize(descendant_total_filesize) 

143 

144 return "None" 

145 

146 def map(self): 

147 from .mapping import item_map 

148 

149 return item_map(self) 

150 

151 def descendant_latlongattributes(self): 

152 return self.descendant_attributes( 

153 attribute_type=LatLongAttribute, include_self=True 

154 ) 

155 

156 def has_descendant_latlongattributes(self): 

157 return self.descendant_latlongattributes().count() > 0 

158 

159 def reslugify_descendants(self): 

160 for item in self.get_descendants(include_self=True): 

161 item.slug = item.slugify_function(item.name) 

162 item.save() 

163 

164 

165class Project(Item): 

166 """  

167 An item which collects a number of datasets which should be run with the same workflow.  

168 

169 Projects ought not to have parents in the tree structure of items. 

170 """ 

171 workflow = models.TextField( 

172 default="", 

173 blank=True, 

174 help_text="URL to snakemake repository/shell script or its content.", 

175 ) 

176 # More workflow languages need to be supported. 

177 # TODO assert parent is none 

178 

179 def get_absolute_url(self): 

180 return reverse("crunch:project-detail", kwargs={"slug": self.slug}) 

181 

182 def unprocessed_datasets(self) -> models.QuerySet: 

183 """ 

184 Returns a QuerySet of all datasets in this project that are not complete and are not locked. 

185 """ 

186 return Dataset.unprocessed().filter(id__in=self.items()) 

187 

188 def completed_datasets(self) -> models.QuerySet: 

189 """ 

190 Returns a QuerySet of all datasets in this project that are completed. 

191 """ 

192 return Dataset.completed().filter(id__in=self.items()) 

193 

194 def running_datasets(self) -> models.QuerySet: 

195 """ 

196 Returns a QuerySet of all datasets in this project that are running. 

197 """ 

198 return Dataset.running().filter(id__in=self.items()) 

199 

200 def failed_datasets(self) -> models.QuerySet: 

201 """ 

202 Returns a QuerySet of all datasets in this project that have failed. 

203 """ 

204 return Dataset.failed().filter(id__in=self.items()) 

205 

206 def next_unprocessed_dataset(self) -> "Dataset": 

207 return self.unprocessed_datasets().first() 

208 

209 

210class Dataset(Item): 

211 """  

212 An item should be run once in a workflow. 

213 

214 The parent of a dataset should be its project. 

215 """ 

216 base_file_path = models.CharField(max_length=4096, default="", blank=True) 

217 locked = models.BooleanField( 

218 default=False, 

219 help_text="If the dataset is locked then it will not show up in the loop of available datasets.", 

220 ) 

221 

222 def save(self, *args, **kwargs): 

223 assert isinstance(self.parent, Project) 

224 

225 if not self.base_file_path: 

226 self.base_file_path = storages.default_dataset_path(self.parent.slug, self.slug) 

227 return super().save(*args, **kwargs) 

228 

229 def get_absolute_url(self) -> str: 

230 return f"{self.parent.get_absolute_url()}datasets/{self.slug}" 

231 

232 @classmethod 

233 def completed_ids(cls) -> List[int]: 

234 """ 

235 Returns a list of ids of all datasets which have a status with stage UPLOAD and state SUCCESS. 

236 """ 

237 return Status.completed().values_list("dataset__id", flat=True) 

238 

239 @classmethod 

240 def completed(cls) -> models.QuerySet: 

241 """ 

242 Returns a QuerySet of all datasets which have a status with stage UPLOAD and state SUCCESS. 

243 """ 

244 return cls.objects.filter(id__in=cls.completed_ids()) 

245 

246 @classmethod 

247 def incomplete(cls) -> models.QuerySet: 

248 """ 

249 Returns a QuerySet of all datasets that are not complete (including unprocessed, running and failed datasets). 

250 """ 

251 return cls.objects.exclude(id__in=cls.completed_ids()) 

252 

253 @classmethod 

254 def unprocessed(cls) -> models.QuerySet: 

255 """ 

256 Returns a QuerySet of all incomplete datasets that are not locked. 

257 """ 

258 return cls.incomplete().filter(locked=False) 

259 

260 @classmethod 

261 def inprocess(cls) -> models.QuerySet: 

262 """ 

263 Returns a QuerySet of all incomplete datasets that are locked (including running and failed datasets). 

264 """ 

265 return cls.incomplete().filter(locked=True) 

266 

267 @classmethod 

268 def has_status(cls) -> models.QuerySet: 

269 """ 

270 Returns a QuerySet of all datasets with at least one status. 

271 """ 

272 statuses = Status.objects.filter(dataset=OuterRef("pk")) 

273 return cls.objects.filter(models.Exists(statuses)) 

274 

275 @classmethod 

276 def failed(cls) -> models.QuerySet: 

277 """ 

278 Returns a QuerySet of all incomplete unlocked datasets where the latest status has a state of 'FAILED'. 

279 """ 

280 newest_statuses = Status.objects.filter(dataset=OuterRef("pk")).order_by( 

281 "-created" 

282 ) 

283 annotated = cls.inprocess().annotate( 

284 newest_status_state=Subquery(newest_statuses.values("state")[:1]) 

285 ) 

286 return annotated.filter(newest_status_state=enums.State.FAIL) 

287 

288 @classmethod 

289 def running(cls) -> models.QuerySet: 

290 """ 

291 Returns a QuerySet of all incomplete unlocked datasets where the latest status does not have a state of 'FAILED'. 

292 """ 

293 return cls.inprocess().exclude(id__in=cls.failed()) 

294 

295 @classmethod 

296 def next_unprocessed(cls) -> "Dataset": 

297 return cls.unprocessed().first() 

298 

299 def files(self): 

300 return storages.storage_walk(self.base_file_path) 

301 

302 def files_html(self): 

303 try: 

304 directory = storages.storage_walk(self.base_file_path) 

305 return directory.render_html() 

306 except Exception: 

307 return f"<p>Failed to read files in dataset {self}</p>" 

308 

309 

310class Status(NextPrevMixin, TimeStampedModel): 

311 dataset = models.ForeignKey( 

312 Dataset, on_delete=models.CASCADE, related_name="statuses" 

313 ) 

314 site_user = models.ForeignKey( 

315 User, on_delete=models.SET_DEFAULT, default=None, blank=True, null=True 

316 ) 

317 stage = models.IntegerField(choices=enums.Stage.choices) 

318 state = models.IntegerField(choices=enums.State.choices) 

319 note = models.TextField(default="", blank=True) 

320 # Diagnostic info 

321 agent_user = OptionalCharField( 

322 help_text="The name of the user running the agent (see https://docs.python.org/3/library/getpass.html)." 

323 ) 

324 version = OptionalCharField( 

325 help_text="The django-crunch version number of the agent." 

326 ) 

327 revision = OptionalCharField( 

328 help_text="The django-crunch git revision hash of the agent." 

329 ) 

330 # terminal = OptionalCharField(help_text="the tty or pseudo-tty associated with the agent user (see https://psutil.readthedocs.io/en/latest/).") 

331 system = OptionalCharField( 

332 help_text="Returns the system/OS name, such as 'Linux', 'Darwin', 'Java', 'Windows' (see https://docs.python.org/3/library/platform.html)." 

333 ) 

334 system_release = OptionalCharField( 

335 help_text="Returns the system’s release, e.g. '2.2.0' or 'NT' (see https://docs.python.org/3/library/platform.html)." 

336 ) 

337 system_version = OptionalCharField( 

338 help_text="Returns the system’s release version, e.g. '#3 on degas' (see https://docs.python.org/3/library/platform.html)." 

339 ) 

340 machine = OptionalCharField( 

341 help_text="Returns the machine type, e.g. 'i386' (see https://docs.python.org/3/library/platform.html)." 

342 ) 

343 hostname = OptionalCharField( 

344 help_text="The hostname of the machine where the agent was running (see https://docs.python.org/3/library/socket.html)." 

345 ) 

346 ip_address = OptionalCharField( 

347 help_text="The hostname in IPv4 address format (see https://docs.python.org/3/library/socket.html)." 

348 ) 

349 mac_address = OptionalCharField( 

350 help_text="The hardware address (see https://docs.python.org/3/library/uuid.html)." 

351 ) 

352 memory_total = models.BigIntegerField( 

353 default=None, 

354 blank=True, 

355 null=True, 

356 help_text="See https://psutil.readthedocs.io/en/latest/", 

357 ) 

358 memory_free = models.BigIntegerField( 

359 default=None, 

360 blank=True, 

361 null=True, 

362 help_text="See https://psutil.readthedocs.io/en/latest/", 

363 ) 

364 disk_total = models.BigIntegerField( 

365 default=None, 

366 blank=True, 

367 null=True, 

368 help_text="See https://psutil.readthedocs.io/en/latest/", 

369 ) 

370 disk_free = models.BigIntegerField( 

371 default=None, 

372 blank=True, 

373 null=True, 

374 help_text="See https://psutil.readthedocs.io/en/latest/", 

375 ) 

376 

377 class Meta: 

378 verbose_name_plural = "statuses" 

379 

380 def __str__(self): 

381 return f"{self.dataset}: {self.get_stage_display()} {self.get_state_display()}" 

382 

383 def save(self, *args, **kwargs): 

384 

385 # Lock dataset if necessary 

386 assert isinstance(self.dataset, Dataset) 

387 if not self.dataset.locked: 

388 self.dataset.locked = True 

389 self.dataset.save() 

390 

391 super().save(*args, **kwargs) 

392 

393 @classmethod 

394 def completed(cls): 

395 return Status.objects.filter( 

396 stage=enums.Stage.UPLOAD, state=enums.State.SUCCESS 

397 ) 

398 

399 

400class Attribute(NextPrevMixin, TimeStampedModel, PolymorphicModel): 

401 item = models.ForeignKey(Item, on_delete=models.CASCADE, related_name="attributes") 

402 key = models.CharField(max_length=255) 

403 

404 def value_dict(self): 

405 return dict(key=self.key) 

406 

407 def value_str(self): 

408 raise NotImplementedError("value_str not implemented for this attribute class") 

409 

410 def value_html(self): 

411 return self.value_str() 

412 

413 def __str__(self): 

414 return f"{self.key}: {self.value_str()}" 

415 

416 def type_str(self) -> str: 

417 """ 

418 Returns a string describing this type of attribute. 

419 

420 By default it returns the class name with spaces added where implied by camel case. 

421 

422 Returns: 

423 str: The type of this attribute as a string. 

424 """ 

425 class_name = self.__class__.__name__ 

426 if class_name.endswith("Attribute"): 

427 class_name = class_name[: -len("Attribute")] 

428 

429 return re.sub(r"((?<=[a-z])[A-Z]|(?<!\A)[A-Z](?=[a-z]))", r" \1", class_name) 

430 

431 

432class ValueAttribute(Attribute): 

433 # Child classes need to give a 'value' field. 

434 

435 class Meta: 

436 abstract = True 

437 

438 def value_dict(self): 

439 d = super().value_dict() 

440 d["value"] = self.value 

441 return d 

442 

443 def value_str(self): 

444 return self.value 

445 

446 

447class CharAttribute(ValueAttribute): 

448 """ An attribute for storing metadata as a string (of maximum length 1023 characters). """ 

449 value = models.CharField(max_length=1023) 

450 

451 

452class FloatAttribute(ValueAttribute): 

453 value = models.FloatField() 

454 

455 

456class IntegerAttribute(ValueAttribute): 

457 value = models.IntegerField() 

458 

459 

460class FilesizeAttribute(ValueAttribute): 

461 value = models.PositiveBigIntegerField( 

462 help_text="The filesize of this item in bytes." 

463 ) 

464 

465 def value_str(self): 

466 return humanize.naturalsize(self.value) 

467 

468 

469class BooleanAttribute(ValueAttribute): 

470 value = models.BooleanField() 

471 

472 

473class DateTimeAttribute(ValueAttribute): 

474 value = models.DateTimeField() 

475 

476 

477class DateAttribute(ValueAttribute): 

478 value = models.DateField() 

479 

480 

481class URLAttribute(ValueAttribute): 

482 value = models.URLField(max_length=1023) 

483 

484 def value_html(self): 

485 return format_html( 

486 "<a href='{}'>{}</a>", 

487 self.value, 

488 self.value, 

489 ) 

490 

491 

492class LatLongAttribute(Attribute): 

493 """ An attribute for storing a geolocation (in decimal degrees). """ 

494 latitude = models.DecimalField( 

495 max_digits=12, 

496 decimal_places=9, 

497 help_text="The latitude of this location in decimal degrees.", 

498 ) 

499 longitude = models.DecimalField( 

500 max_digits=12, 

501 decimal_places=9, 

502 help_text="The longitude of this location in decimal degrees.", 

503 ) 

504 

505 def value_dict(self): 

506 d = super().value_dict() 

507 d["latitude"] = self.latitude 

508 d["longitude"] = self.longitude 

509 return d 

510 

511 def value_str(self): 

512 return f"{self.latitude:+}{self.longitude:+}/" 

513 

514 def value_html(self): 

515 return format_html( 

516 "<a href='https://www.google.com/maps/place/{},{}'>{}</a>", 

517 self.latitude, 

518 self.longitude, 

519 self.value_str(), 

520 )