Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1import sys 

2from pathlib import Path 

3from appdirs import user_cache_dir 

4from typing import Union 

5import urllib.request 

6from datetime import datetime, timedelta 

7 

8 

9ACCEPTED_QUARTERS = ("mar", "jun", "sep", "dec") 

10 

11 

12class DownloadError(Exception): 

13 pass 

14 

15 

16def get_cached_path(filename: str) -> Path: 

17 """ 

18 Returns a path in the ausdex directory in the user's cache. 

19 

20 File may or may not exist. 

21 """ 

22 cache_dir = Path(user_cache_dir("ausdex")) 

23 cache_dir.mkdir(exist_ok=True, parents=True) 

24 return cache_dir / filename 

25 

26 

27def cached_download(url: str, local_path: Union[str, Path], force: bool = False, verbose:bool = False) -> None: 

28 """ 

29 Downloads a file if a local file does not already exist. 

30 

31 Args: 

32 url (str): The url of the file to download. 

33 local_path (str, Path): The local path of where the file should be. 

34 If this file isn't there or the file size is zero then this function downloads it to this location. 

35 force (bool): Whether or not the file should be forced to download again even if present in the local path. 

36 Default False. 

37 

38 Raises: 

39 DownloadError: Raises an exception if it cannot download the file. 

40 IOError: Raises an exception if the file does not exist or is empty after downloading. 

41 """ 

42 local_path = Path(local_path) 

43 if (not local_path.exists() or local_path.stat().st_size == 0) or force: 

44 try: 

45 urllib.request.urlretrieve(url, local_path) 

46 except Exception: 

47 raise DownloadError(f"Error downloading {url}") 

48 

49 if not local_path.exists() or local_path.stat().st_size == 0: 

50 raise IOError(f"Error reading {local_path}") 

51 

52 

53def cached_download_abs( 

54 id: str, 

55 quarter: str, 

56 year: Union[int, str], 

57 extension: str, 

58 local_path: Union[Path, str, None] = None, 

59 force: bool = False, 

60) -> Path: 

61 """ 

62 Downloads a file from the ABS if a local file does not already exist. 

63 

64 Args: 

65 id (str): The ABS id for the datafile. For Australian Consumer Price Index the ID is 640101. 

66 quarter (str): The quarter of the file in question. One of "mar", "jun", "sep", or "dec". 

67 year (str, int): The year for the file in question. 

68 extension (str): The extension of the file in question. 

69 local_path (Path, str, optional): The path to where the file should be downloaded. 

70 If None, then it is downloaded in the user's cache directory. 

71 force (bool): Whether or not the file should be forced to download again even if present in the local path. 

72 local_path (str, Path): The local path of where the file should be. 

73 If this file isn't there or the file size is zero then this function downloads it to this location. 

74 force (bool): Whether or not the file should be forced to download again even if present in the local path. 

75 

76 Raises: 

77 ValueError: If the value for `quarter` cannot be understood. 

78 DownloadError: Raises an exception if it cannot download the file. 

79 IOError: Raises an exception if the file does not exist or is empty after downloading. 

80 

81 Returns: 

82 Path: The path to the downloaded ABS datafile. 

83 """ 

84 quarter = quarter.lower()[:3] 

85 if quarter not in ACCEPTED_QUARTERS: 

86 raise ValueError(f"Cannot understand quarter {quarter}.") 

87 

88 if (year == 2021 and quarter == 'dec') or year > 2021: 

89 extension = "xlsx" 

90 else: 

91 extension = "xls" 

92 

93 if (year == 2022 and quarter in ['jun', 'dec']) or year > 2022: 

94 online_dir = f"{quarter}-quarter-{year}" 

95 else: 

96 online_dir = f"{quarter}-{year}" 

97 

98 

99 local_path = local_path or get_cached_path(f"{id}-{quarter}-{year}.{extension}") 

100 local_path = Path(local_path) 

101 

102 url = f"https://www.abs.gov.au/statistics/economy/price-indexes-and-inflation/consumer-price-index-australia/{online_dir}/{id}.{extension}" 

103 cached_download(url, local_path, force=force) 

104 

105 return local_path 

106 

107 

108def cached_download_abs_excel( 

109 id: str, quarter: str, year: Union[str, int], local_path: Union[Path, str, None] = None, force: bool = False 

110) -> Path: 

111 """ 

112 Gets am Excel file from the Australian Burau of Statistics. 

113 

114 First it tries the extension `xlsx` and then falls back to `xls`. 

115 

116 Args: 

117 id (str): The ABS id for the datafile. For Australian Consumer Price Index the ID is 640101. 

118 quarter (str): The quarter of the file in question. One of "mar", "jun", "sep", or "dec". 

119 year (str, int): The year for the file in question. 

120 local_path (Path, str, optional): The path to where the file should be downloaded. 

121 If None, then it is downloaded in the user's cache directory. 

122 force (bool): Whether or not the file should be forced to download again even if present in the local path. 

123 Default False. 

124 

125 Raises: 

126 ValueError: Raises this error if the quarter cannot be understood. 

127 

128 Returns: 

129 Path: The path to the cached ABS datafile 

130 """ 

131 local_path = cached_download_abs( 

132 quarter=quarter, year=year, id=id, extension="xlsx", local_path=local_path, force=force 

133 ) 

134 

135 return local_path 

136 

137 

138def cached_download_abs_excel_by_date( 

139 id: str, date: Union[datetime, None] = None, local_path: Union[Path, str, None] = None, force: bool = False 

140) -> Path: 

141 """ 

142 Gets a datafile from the Australian Burau of Statistics before a specific date. 

143 

144 Args: 

145 id (str): The ABS id for the datafile. For Australian Consumer Price Index (CPI) the ID is 640101. 

146 date (datetime, optional): The date before which the CPI data should be valid. 

147 If not provided, then it uses today's date download get the latest file. 

148 local_path (Path, str, optional): The path to where the file should be downloaded. 

149 If None, then it is downloaded in the user's cache directory. 

150 force (bool): Whether or not the file should be forced to download again even if present in the local path. 

151 Default False. 

152 

153 Returns: 

154 Path: The path to the cached ABS datafile. 

155 """ 

156 date = date or datetime.now() 

157 file = None 

158 while file is None and date > datetime(1948, 1, 1): 

159 year = date.year 

160 quarter_index = (date.month - 3) // 3 

161 if quarter_index == -1: 

162 quarter_index = 3 

163 year -= 1 

164 quarter = ACCEPTED_QUARTERS[quarter_index] 

165 

166 try: 

167 file = cached_download_abs_excel(id, quarter, year, local_path=local_path, force=force) 

168 break 

169 except (DownloadError, IOError): 

170 print(f"WARNING: CPI data for Quarter {quarter.title()} {year} not yet available.", file=sys.stderr) 

171 

172 date -= timedelta(days=89) # go back approximately a quarter 

173 

174 return file 

175 

176 

177def cached_download_cpi( 

178 *, date: Union[datetime, None] = None, local_path: Union[Path, str, None] = None, force: bool = False 

179) -> Path: 

180 """ 

181 Returns the path to the latest cached file with the Australian Consumer Price Index (CPI) data. 

182 

183 It downloads the file if it does not exist already. The ABS id of this file is "640101". 

184 

185 Args: 

186 date (datetime, optional): The date before which the CPI data should be valid. 

187 If not provided, then it uses today's date download get the latest file. 

188 local_path (Path, str, optional): The path to where the file should be downloaded. 

189 If None, then it is downloaded in the user's cache directory. 

190 force (bool): Whether or not the file should be forced to download again even if present in the local path. 

191 Default False. 

192 

193 Returns: 

194 Path: The path to the cached datafile. 

195 """ 

196 CPI_FILE_ID = "640101" 

197 return cached_download_abs_excel_by_date(id=CPI_FILE_ID, date=date, local_path=local_path, force=force)