Coverage for crunch/django/app/storages.py: 100.00%

137 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-10-01 13:43 +0000

1import logging 

2from typing import Dict, Union 

3import toml 

4import json 

5from operator import mod 

6from pathlib import Path 

7from django.core.files import File 

8from django.conf import settings 

9from anytree import NodeMixin, RenderTree, PreOrderIter 

10from django.core.files.storage import default_storage, DefaultStorage 

11import os 

12import time 

13import shutil 

14import datetime 

15 

16 

17class Directory: 

18 pass 

19 

20 

21def get_storage_with_settings(storage_settings:Union[Dict,Path]) -> DefaultStorage: 

22 """ 

23 Configures the Django config with settings if it has not been configured yet and then returns the default storage. 

24 

25 Useful for connecting to a Django storage object outside of a webserver. 

26 

27 Args: 

28 storage_settings (Union[Dict,Path]): The settings with which to configure the Django config if it has not been configured already. 

29 Can be a dictionary with settings or can be a path to a toml or json file with the settings. 

30 

31 Raises: 

32 IOError: Raised if passed a Path object which isn't a toml or json file. 

33 ValueError: Raised if the settings cannot be interpreted as a dictionary. 

34 

35 Returns: 

36 DefaultStorage: The default storage object used by Django. 

37 """ 

38 if not settings.configured: 

39 if isinstance(storage_settings, Path): 

40 with open(storage_settings) as storage_settings_file: 

41 suffix = storage_settings.suffix.lower() 

42 if suffix == ".toml": 

43 storage_settings = toml.load(storage_settings_file) 

44 elif suffix == ".json": 

45 storage_settings = json.load(storage_settings_file) 

46 else: 

47 raise IOError(f"Cannot find interpreter for {storage_settings}") 

48 

49 if not isinstance(storage_settings, dict): 

50 raise ValueError( 

51 f"Storage settings of type {type(storage_settings)} unable to be read. " + 

52 "Please pass a dictionary or a path to a toml or json file." 

53 ) 

54 settings.configure(**storage_settings) 

55 return default_storage 

56 

57 

58class StorageDirectory(NodeMixin): 

59 def __init__( 

60 self, *args, base_path: Union[str,Path], storage=None, parent=None, children=None, **kwargs 

61 ): 

62 super().__init__(*args, **kwargs) 

63 

64 self.base_path = Path(base_path) 

65 self.storage = storage 

66 self.parent = parent 

67 if children: 

68 self.children = children 

69 

70 def __str__(self): 

71 return str(self.base_path) 

72 

73 def __repr__(self): 

74 return str(self) 

75 

76 def files(self): 

77 """Returns a list of StorageFile objects for files in this immediate directory.""" 

78 return [child for child in self.children if isinstance(child, StorageFile)] 

79 

80 def short_str(self): 

81 if self.parent: 

82 return str(self.base_path.relative_to(self.parent.base_path)) 

83 return "" 

84 

85 def render(self): 

86 result = f"{self.base_path}\n" 

87 for pre, _, node in RenderTree(self): 

88 if node == self: 

89 continue 

90 treestr = f"{pre}{node.short_str()}" 

91 result += treestr.ljust(8) + "\n" 

92 return result 

93 

94 def render_html(self): 

95 try: 

96 result = "<div>" 

97 result += f"{self.base_path}<br>\n" 

98 for pre, _, node in RenderTree(self): 

99 if node == self: 

100 continue 

101 

102 if isinstance(node, StorageFile): 

103 result += f"{pre}<a href='{node.url()}'>{node.short_str()}</a><br>\n" 

104 else: 

105 result += f"{pre}{node.short_str()}<br>\n" 

106 result += "</div>" 

107 except Exception as err: 

108 result = f"<div>Failed to read storage at {self.base_path}</div>" 

109 

110 return result 

111 

112 def directory_descendents(self, stop=None, maxlevel: int = None): 

113 """Does a pre order iteration of subdirectories.""" 

114 return PreOrderIter( 

115 self, 

116 filter_=lambda node: isinstance(node, StorageDirectory), 

117 stop=stop, 

118 maxlevel=maxlevel, 

119 ) 

120 

121 def file_descendents(self, stop=None, maxlevel: int = None): 

122 """Does a pre order iteration of subdirectories and returns the files in them.""" 

123 return PreOrderIter( 

124 self, 

125 filter_=lambda node: isinstance(node, StorageFile), 

126 stop=stop, 

127 maxlevel=maxlevel, 

128 ) 

129 

130 

131class StorageFile(NodeMixin): 

132 def __init__(self, *args, filename, parent, **kwargs): 

133 super().__init__(*args, **kwargs) 

134 self.filename = filename 

135 self.parent = parent 

136 

137 def __str__(self): 

138 return self.filename 

139 

140 def short_str(self): 

141 return str(self) 

142 

143 def __repr__(self): 

144 return str(self) 

145 

146 def path(self) -> Path: 

147 return Path(self.parent.base_path, self.filename) 

148 

149 def url(self): 

150 storage = self.parent.storage or default_storage 

151 return storage.url(str(self.path())) 

152 

153 

154def storage_walk( 

155 base_path="/", storage=None, parent=None 

156) -> StorageDirectory: 

157 """ 

158 Recursively walks a folder, using Django's File Storage. 

159 

160 Adapted from https://gist.github.com/dvf/c103e697dab77c304d39d60cf279c500 

161 """ 

162 if storage is None: 

163 storage = default_storage 

164 

165 folders, filenames = storage.listdir(str(base_path)) 

166 

167 directory = StorageDirectory(base_path=base_path, parent=parent, storage=storage) 

168 

169 for subfolder in folders: 

170 # On S3, we don't have subfolders, so exclude "." 

171 if subfolder == ".": 

172 continue 

173 

174 new_base = Path(base_path, subfolder) 

175 storage_walk( 

176 base_path=new_base, 

177 storage=storage, 

178 parent=directory, 

179 ) 

180 

181 for filename in filenames: 

182 StorageFile(filename=filename, parent=directory) 

183 

184 return directory 

185 

186 

187def default_dataset_path(project_slug, dataset_slug): 

188 return Path("crunch", project_slug, dataset_slug) 

189 

190 

191def copy_recursive_to_storage(local_dir=".", base="/", storage=None): 

192 copy_to_storage(local_dir.rglob("*"), local_dir=local_dir, base=base, storage=storage) 

193 

194 

195def copy_to_storage(paths, local_dir, base="/", storage=None): 

196 base = Path(base) 

197 if storage is None: 

198 storage = default_storage 

199 

200 for local_path in paths: 

201 if local_path.is_dir(): 

202 continue 

203 

204 local_relative_path = local_path.relative_to(local_dir) 

205 remote_path = str(base / local_relative_path) 

206 

207 print( 

208 f"Copying '{local_path}' from local directory '{local_dir}' to storage at '{remote_path}'" 

209 ) 

210 with local_path.open(mode="rb") as f: 

211 storage._save(remote_path, File(f, name=str(local_path))) 

212 

213 

214def copy_recursive_from_storage(base="/", local_dir=".", storage=None): 

215 base = Path(base) 

216 local_dir = Path(local_dir) 

217 if storage is None: 

218 storage = default_storage 

219 

220 dir_object = storage_walk(base_path=base, storage=storage) 

221 subdirs = dir_object.directory_descendents() 

222 

223 for subdir in subdirs: 

224 listing_path = Path(subdir.base_path) 

225 relative_path = listing_path.relative_to(base) 

226 local_path = local_dir / relative_path 

227 local_path.mkdir(exist_ok=True, parents=True) 

228 

229 for file in subdir.files(): 

230 filename = file.filename 

231 print( 

232 f"Copying '{filename}' in '{listing_path}' from storage to '{local_path}'" 

233 ) 

234 with storage.open(str(listing_path / filename), "rb") as source: 

235 with open(local_path / filename, "wb") as target: 

236 shutil.copyfileobj(source, target, length=1024)