1 #!/usr/bin/python2.4
2 #
3 # CDDL HEADER START
4 #
5 # The contents of this file are subject to the terms of the
6 # Common Development and Distribution License (the "License").
7 # You may not use this file except in compliance with the License.
8 #
9 # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 # or http://www.opensolaris.org/os/licensing.
11 # See the License for the specific language governing permissions
12 # and limitations under the License.
13 #
14 # When distributing Covered Code, include this CDDL HEADER in each
15 # file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 # If applicable, add the following below this CDDL HEADER, with the
17 # fields enclosed by brackets "[]" replaced with your own identifying
18 # information: Portions Copyright [yyyy] [name of copyright owner]
19 #
20 # CDDL HEADER END
21 #
22
23 #
24 # Copyright 2008 Sun Microsystems, Inc. All rights reserved.
25 # Use is subject to license terms.
26 #
27
28 import os
29 import urllib
30 import urllib2
31 import httplib
32 import socket
33 import time
34 import sha
35 from tarfile import ReadError
36
37 import pkg.pkgtarfile as ptf
38 import pkg.portable as portable
39 import pkg.fmri
40 from pkg.misc import versioned_urlopen
41 from pkg.misc import hash_file_name
42 from pkg.misc import get_pkg_otw_size
43 from pkg.misc import TransferTimedOutException
44 from pkg.misc import TransferContentException
45 from pkg.misc import InvalidContentException
46 from pkg.misc import MAX_TIMEOUT_COUNT
47 from pkg.misc import retryable_http_errors
48
49 class FileList(object):
50 """A FileList maintains mappings between files and Actions.
51 The list is built with knowledge of the Image and the PackagePlan's
52 associated actions.
53
54 The FileList is responsible for downloading the files needed by the
55 PkgPlan from the repository. Once downloaded, the FileList generates
56 the appropriate opener and closer for the actions that it processed. By
57 downloading files in a group, it is possible to achieve better
58 performance. This is because the FileList asks for the files to be
59 sent in groups, instead of individual HTTP GET's.
60
61 The caller may limit the maximum number of bytes of content in a
62 FileList by specifying maxbytes when the object is constructed.
63 If the caller sets maxbytes to 0, the size of the list is assumed
64 to be infinite."""
65
66 #
67 # This value can be tuned by external callers to adjust the
68 # default "maxbytes" value for a file list. This value should be
69 # tuned to the lowest value which provides "good enough" performance;
70 # tuning beyond 1MB has not in our experiments thus far yielded more
71 # than a token speedup-- at the expense of interactivity.
72 #
73 maxbytes_default = 1024 * 1024
74
75 def __init__(self, image, fmri, progtrack, maxbytes=None):
76 """
77 Create a FileList object for the specified image and pkgplan.
78 """
79
80 self.image = image
81 self.fmri = fmri
82 self.progtrack = progtrack
83 self.fhash = { }
84
85 if maxbytes is None:
86 self.maxbytes = FileList.maxbytes_default
87 else:
88 self.maxbytes = maxbytes
89
90 self.actual_bytes = 0
91 self.actual_nfiles = 0
92 self.effective_bytes = 0
93 self.effective_nfiles = 0
94
95 if fmri:
96 auth, pkg_name, version = self.fmri.tuple()
97
98 self.authority = pkg.fmri.strip_auth_pfx(auth)
99 self.ssl_tuple = self.image.get_ssl_credentials(auth)
100 self.uuid = self.image.get_uuid(self.authority)
101 else:
102 self.authority = None
103 self.ssl_tuple = None
104 self.uuid = None
105
106 self.ds = None
107 self.url = None
108
109 def add_action(self, action):
110 """Add the specified action to the filelist. The action
111 must name a file that can be retrieved from the repository.
112
113 This method will pull cached content from the download
114 directory, if it's available."""
115
116 # Check if we've got a cached version of the file before
117 # trying to add it to the list. If a cached version is present,
118 # create the opener and return.
119
120 hashval = action.hash
121 cache_path = os.path.normpath(os.path.join(
122 self.image.cached_download_dir(),
123 hash_file_name(hashval)))
124
125 try:
126 if os.path.exists(cache_path):
127 action.data = self._make_opener(cache_path)
128 bytes = get_pkg_otw_size(action)
129
130 self._verify_content(action, cache_path)
131 self.progtrack.download_add_progress(1, bytes)
132
133 return
134 except InvalidContentException:
135 # If the content in the cache doesn't match the hash of
136 # the action, verify will have already purged the item
137 # from the cache. Reset action.data to None and have
138 # _add_action download the file.
139 action.data = None
140
141 while self._is_full():
142 self._do_get_files()
143
144 self._add_action(action)
145
146 def _add_action(self, action):
147 """Add the specified action to the filelist. The action
148 must name a file that can be retrieved from the repository.
149
150 This method gets invoked when we must go over the network
151 to retrieve file content.
152
153 This is a private method which performs the majority of the
154 work for add_content()."""
155
156 if not hasattr(action, "hash"):
157 raise FileListException, "Invalid action type"
158
159 if self._is_full():
160 raise FileListFullException
161
162 hashval = action.hash
163
164 # Each fhash key accesses a list of one or more actions. If we
165 # already have a key in the dictionary, get the list and append
166 # the action to it. Otherwise, create a new list with the first
167 # action.
168 if hashval in self.fhash:
169 l = self.fhash[hashval]
170 l.append(action)
171 else:
172 self.fhash[hashval] = [ action ]
173 self.actual_nfiles += 1
174 self.actual_bytes += get_pkg_otw_size(action)
175
176 # Regardless of whether files map to the same hash, we
177 # also track the total (effective) size and number of entries
178 # in the flist, for reporting purposes.
179 self.effective_nfiles += 1
180 self.effective_bytes += get_pkg_otw_size(action)
181
182 def _clear_mirror(self):
183 """Clear any selected DepotStatus and URL assocated with
184 a mirror selection."""
185
186 self.ds = None
187 self.url = None
188
189 def _del_hash(self, hash):
190 """Given the supplied content hash, remove the entry
191 from the flist's dictionary and adjust the counters
192 accordingly."""
193
194 try:
195 act_list = self.fhash[hash]
196 except KeyError:
197 return
198
199 pkgsz = get_pkg_otw_size(act_list[0])
200 nactions = len(act_list)
201
202 # Update the actual counts by subtracting the first
203 # item in the list
204 self.actual_nfiles -= 1
205 self.actual_bytes -= pkgsz
206
207 # Now update effective count
208 self.effective_nfiles -= nactions
209 self.effective_bytes -= nactions * pkgsz
210
211 # Now delete the entry out of the dictionary
212 del self.fhash[hash]
213
214
215 # XXX detect missing size and warn
216
217 def _do_get_files(self):
218 """A wrapper around _get_files. This handles exceptions
219 that might occur and deals with timeouts."""
220
221 retry_count = MAX_TIMEOUT_COUNT
222 files_extracted = 0
223
224 nfiles = self._get_nfiles()
225 nbytes = self._get_nbytes()
226 chosen_mirrors = set()
227 ts = 0
228
229 while files_extracted == 0:
230 try:
231 self._pick_mirror(chosen_mirrors)
232 ts = time.time()
233
234 fe = self._get_files()
235 files_extracted += fe
236
237 except (TransferTimedOutException,
238 TransferContentException, InvalidContentException):
239
240 retry_count -= 1
241 self.ds.record_error()
242 self._clear_mirror()
243
244 if retry_count <= 0:
245 raise TransferTimedOutException
246 else:
247 ts = time.time() - ts
248 self.ds.record_success(ts)
249
250 nfiles -= self._get_nfiles()
251 nbytes -= self._get_nbytes()
252 self.progtrack.download_add_progress(nfiles, nbytes)
253
254 def _extract_file(self, tarinfo, tar_stream, download_dir):
255 """Given a tarinfo object, extract that onto the filesystem
256 so it can be installed."""
257
258 completed_dir = self.image.cached_download_dir()
259
260 hashval = tarinfo.name
261
262 # Set the perms of the temporary file. The file must
263 # be writable so that the mod time can be changed on Windows
264 tarinfo.mode = 0600
265 tarinfo.uname = "root"
266 tarinfo.gname = "root"
267
268 # XXX catch IOError if tar stream closes inadvertently?
269 tar_stream.extract_to(tarinfo, download_dir, hashval)
270
271 # Now that the file has been successfully extracted, move
272 # it to the cached content directory.
273 final_path = os.path.normpath(os.path.join(completed_dir,
274 hash_file_name(hashval)))
275
276 if not os.path.exists(os.path.dirname(final_path)):
277 os.makedirs(os.path.dirname(final_path))
278
279 portable.rename(os.path.join(download_dir, hashval), final_path)
280
281 # assign opener to actions in the list
282 try:
283 l = self.fhash[hashval]
284 except KeyError:
285 # If the key isn't in the dictionary, the server sent us
286 # a file we didn't ask for. In this case, we can't
287 # create an opener for it, nor should we leave it in the
288 # cache.
289 os.remove(final_path)
290 return
291
292 self._verify_content(l[0], final_path)
293
294 for action in l:
295 action.data = self._make_opener(final_path)
296
297 # Remove successfully extracted items from the hash
298 # and adjust bean counters
299 self._del_hash(hashval)
300
301
302 def flush(self):
303 """Ensure that the actions added to the filelist have had
304 their data retrieved from the depot."""
305 while self._list_size() > 0:
306 self._do_get_files()
307
308 def _get_files(self):
309 """Instruct the FileList object to download the files
310 for the actions that have been associated with this object.
311
312 This routine will raise a FileListException if the server
313 does not support filelist. Callers of get_files should
314 consider catching this exception."""
315
316 req_dict = { }
317 tar_stream = None
318 files_extracted = 0
319
320 url_prefix = self.url
321
322 download_dir = self.image.incoming_download_dir()
323 # Make sure the download directory is there before we start
324 # retrieving and extracting files.
325 try:
326 if not os.path.exists(download_dir):
327 os.makedirs(download_dir)
328 except OSError, (errno, errorstr):
329 raise RuntimeError("unable to create " \
330 "download directory %s: %s" %
331 (download_dir, errorstr))
332
333 for i, k in enumerate(self.fhash.keys()):
334 fstr = "File-Name-%s" % i
335 req_dict[fstr] = k
336
337 req_str = urllib.urlencode(req_dict)
338
339 try:
340 f, v = versioned_urlopen(url_prefix, "filelist", [0],
341 data=req_str, ssl_creds=self.ssl_tuple,
342 imgtype=self.image.type, uuid=self.uuid)
343 except RuntimeError:
344 raise FileListException, "No server-side support"
345 except urllib2.HTTPError, e:
346 # Must check for HTTPError before URLError
347 if e.code in retryable_http_errors:
348 raise TransferTimedOutException
349 raise
350 except urllib2.URLError, e:
351 if len(e.args) == 1 and \
352 isinstance(e.args[0], socket.timeout):
353 self.image.cleanup_downloads()
354 raise TransferTimedOutException
355 raise
356
357 # Exception handling here is a bit complicated. The finally
358 # block makes sure we always close our file objects. If we get
359 # a socket.timeout we may have gotten an error in the middle of
360 # downloading a file. In that case, delete the incoming files we
361 # were processing. They were not successfully retrieved.
362 try:
363 try:
364 tar_stream = ptf.PkgTarFile.open(mode = "r|",
365 fileobj = f)
366 for info in tar_stream:
367 self._extract_file(info, tar_stream,
368 download_dir)
369 files_extracted += 1
370 except socket.timeout:
371 self.image.cleanup_downloads()
372 raise TransferTimedOutException
373 except ReadError:
374 raise TransferContentException
375
376 finally:
377 if tar_stream:
378 tar_stream.close()
379 f.close()
380
381 return files_extracted
382
383 def _get_nbytes(self):
384 return self.effective_bytes
385
386 def _get_nfiles(self):
387 return self.effective_nfiles
388
389 def _is_full(self):
390 """Returns true if the FileList object has filled its
391 allocated slots and can no longer accept new actions."""
392
393 if self.maxbytes > 0 and self.actual_bytes >= self.maxbytes:
394 return True
395
396 return False
397
398 def _list_size(self):
399 """Returns the current number of files in the filelist."""
400
401 return len(self.fhash)
402
403 @staticmethod
404 def _make_opener(filepath):
405 def opener():
406 f = open(filepath, "rb")
407 return f
408 return opener
409
410 def _pick_mirror(self, chosen_set=None):
411 """If we don't already have a DepotStatus or a URL,
412 select a mirror, populate the DepotStatus, and choose a URL."""
413
414 if self.ds and self.url:
415 return
416 elif self.ds:
417 self.url = self.ds.url
418 else:
419 self.ds = self.image.select_mirror(self.authority,
420 chosen_set)
421 self.url = self.ds.url
422 chosen_set.add(self.ds)
423
424 @staticmethod
425 def _verify_content(action, filepath):
426 """If action contains an attribute that has the compressed
427 hash, read the file specified in filepath and verify
428 that the hash values match. If the values do not match,
429 remove the file and raise an InvalidContentException."""
430
431 chash = action.attrs.get("chash", None)
432 if not chash:
433 return
434
435 cfile = open(filepath, "rb")
436 cdata = cfile.read()
437 cfile.close()
438 hashobj = sha.new(cdata)
439 newhash = hashobj.hexdigest()
440 cdata = None
441
442 if chash != newhash:
443 os.remove(filepath)
444 raise InvalidContentException(action, newhash)
445
446
447 class FileListException(Exception):
448 def __init__(self, args=None):
449 self.args = args
450
451 class FileListFullException(FileListException):
452 pass