Coverage for crunch/django/app/storages.py: 100.00%
137 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-10-01 13:43 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2023-10-01 13:43 +0000
1import logging
2from typing import Dict, Union
3import toml
4import json
5from operator import mod
6from pathlib import Path
7from django.core.files import File
8from django.conf import settings
9from anytree import NodeMixin, RenderTree, PreOrderIter
10from django.core.files.storage import default_storage, DefaultStorage
11import os
12import time
13import shutil
14import datetime
17class Directory:
18 pass
21def get_storage_with_settings(storage_settings:Union[Dict,Path]) -> DefaultStorage:
22 """
23 Configures the Django config with settings if it has not been configured yet and then returns the default storage.
25 Useful for connecting to a Django storage object outside of a webserver.
27 Args:
28 storage_settings (Union[Dict,Path]): The settings with which to configure the Django config if it has not been configured already.
29 Can be a dictionary with settings or can be a path to a toml or json file with the settings.
31 Raises:
32 IOError: Raised if passed a Path object which isn't a toml or json file.
33 ValueError: Raised if the settings cannot be interpreted as a dictionary.
35 Returns:
36 DefaultStorage: The default storage object used by Django.
37 """
38 if not settings.configured:
39 if isinstance(storage_settings, Path):
40 with open(storage_settings) as storage_settings_file:
41 suffix = storage_settings.suffix.lower()
42 if suffix == ".toml":
43 storage_settings = toml.load(storage_settings_file)
44 elif suffix == ".json":
45 storage_settings = json.load(storage_settings_file)
46 else:
47 raise IOError(f"Cannot find interpreter for {storage_settings}")
49 if not isinstance(storage_settings, dict):
50 raise ValueError(
51 f"Storage settings of type {type(storage_settings)} unable to be read. " +
52 "Please pass a dictionary or a path to a toml or json file."
53 )
54 settings.configure(**storage_settings)
55 return default_storage
58class StorageDirectory(NodeMixin):
59 def __init__(
60 self, *args, base_path: Union[str,Path], storage=None, parent=None, children=None, **kwargs
61 ):
62 super().__init__(*args, **kwargs)
64 self.base_path = Path(base_path)
65 self.storage = storage
66 self.parent = parent
67 if children:
68 self.children = children
70 def __str__(self):
71 return str(self.base_path)
73 def __repr__(self):
74 return str(self)
76 def files(self):
77 """Returns a list of StorageFile objects for files in this immediate directory."""
78 return [child for child in self.children if isinstance(child, StorageFile)]
80 def short_str(self):
81 if self.parent:
82 return str(self.base_path.relative_to(self.parent.base_path))
83 return ""
85 def render(self):
86 result = f"{self.base_path}\n"
87 for pre, _, node in RenderTree(self):
88 if node == self:
89 continue
90 treestr = f"{pre}{node.short_str()}"
91 result += treestr.ljust(8) + "\n"
92 return result
94 def render_html(self):
95 try:
96 result = "<div>"
97 result += f"{self.base_path}<br>\n"
98 for pre, _, node in RenderTree(self):
99 if node == self:
100 continue
102 if isinstance(node, StorageFile):
103 result += f"{pre}<a href='{node.url()}'>{node.short_str()}</a><br>\n"
104 else:
105 result += f"{pre}{node.short_str()}<br>\n"
106 result += "</div>"
107 except Exception as err:
108 result = f"<div>Failed to read storage at {self.base_path}</div>"
110 return result
112 def directory_descendents(self, stop=None, maxlevel: int = None):
113 """Does a pre order iteration of subdirectories."""
114 return PreOrderIter(
115 self,
116 filter_=lambda node: isinstance(node, StorageDirectory),
117 stop=stop,
118 maxlevel=maxlevel,
119 )
121 def file_descendents(self, stop=None, maxlevel: int = None):
122 """Does a pre order iteration of subdirectories and returns the files in them."""
123 return PreOrderIter(
124 self,
125 filter_=lambda node: isinstance(node, StorageFile),
126 stop=stop,
127 maxlevel=maxlevel,
128 )
131class StorageFile(NodeMixin):
132 def __init__(self, *args, filename, parent, **kwargs):
133 super().__init__(*args, **kwargs)
134 self.filename = filename
135 self.parent = parent
137 def __str__(self):
138 return self.filename
140 def short_str(self):
141 return str(self)
143 def __repr__(self):
144 return str(self)
146 def path(self) -> Path:
147 return Path(self.parent.base_path, self.filename)
149 def url(self):
150 storage = self.parent.storage or default_storage
151 return storage.url(str(self.path()))
154def storage_walk(
155 base_path="/", storage=None, parent=None
156) -> StorageDirectory:
157 """
158 Recursively walks a folder, using Django's File Storage.
160 Adapted from https://gist.github.com/dvf/c103e697dab77c304d39d60cf279c500
161 """
162 if storage is None:
163 storage = default_storage
165 folders, filenames = storage.listdir(str(base_path))
167 directory = StorageDirectory(base_path=base_path, parent=parent, storage=storage)
169 for subfolder in folders:
170 # On S3, we don't have subfolders, so exclude "."
171 if subfolder == ".":
172 continue
174 new_base = Path(base_path, subfolder)
175 storage_walk(
176 base_path=new_base,
177 storage=storage,
178 parent=directory,
179 )
181 for filename in filenames:
182 StorageFile(filename=filename, parent=directory)
184 return directory
187def default_dataset_path(project_slug, dataset_slug):
188 return Path("crunch", project_slug, dataset_slug)
191def copy_recursive_to_storage(local_dir=".", base="/", storage=None):
192 copy_to_storage(local_dir.rglob("*"), local_dir=local_dir, base=base, storage=storage)
195def copy_to_storage(paths, local_dir, base="/", storage=None):
196 base = Path(base)
197 if storage is None:
198 storage = default_storage
200 for local_path in paths:
201 if local_path.is_dir():
202 continue
204 local_relative_path = local_path.relative_to(local_dir)
205 remote_path = str(base / local_relative_path)
207 print(
208 f"Copying '{local_path}' from local directory '{local_dir}' to storage at '{remote_path}'"
209 )
210 with local_path.open(mode="rb") as f:
211 storage._save(remote_path, File(f, name=str(local_path)))
214def copy_recursive_from_storage(base="/", local_dir=".", storage=None):
215 base = Path(base)
216 local_dir = Path(local_dir)
217 if storage is None:
218 storage = default_storage
220 dir_object = storage_walk(base_path=base, storage=storage)
221 subdirs = dir_object.directory_descendents()
223 for subdir in subdirs:
224 listing_path = Path(subdir.base_path)
225 relative_path = listing_path.relative_to(base)
226 local_path = local_dir / relative_path
227 local_path.mkdir(exist_ok=True, parents=True)
229 for file in subdir.files():
230 filename = file.filename
231 print(
232 f"Copying '{filename}' in '{listing_path}' from storage to '{local_path}'"
233 )
234 with storage.open(str(listing_path / filename), "rb") as source:
235 with open(local_path / filename, "wb") as target:
236 shutil.copyfileobj(source, target, length=1024)