1 #!/usr/bin/python2.4
   2 #
   3 # CDDL HEADER START
   4 #
   5 # The contents of this file are subject to the terms of the
   6 # Common Development and Distribution License (the "License").
   7 # You may not use this file except in compliance with the License.
   8 #
   9 # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  10 # or http://www.opensolaris.org/os/licensing.
  11 # See the License for the specific language governing permissions
  12 # and limitations under the License.
  13 #
  14 # When distributing Covered Code, include this CDDL HEADER in each
  15 # file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  16 # If applicable, add the following below this CDDL HEADER, with the
  17 # fields enclosed by brackets "[]" replaced with your own identifying
  18 # information: Portions Copyright [yyyy] [name of copyright owner]
  19 #
  20 # CDDL HEADER END
  21 #
  22 
  23 #
  24 # Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  25 # Use is subject to license terms.
  26 #
  27 
  28 import os
  29 import urllib
  30 import urllib2
  31 import httplib
  32 import socket
  33 import time
  34 import sha
  35 from tarfile import ReadError
  36 
  37 import pkg.pkgtarfile as ptf
  38 import pkg.portable as portable
  39 import pkg.fmri
  40 from pkg.misc import versioned_urlopen
  41 from pkg.misc import hash_file_name
  42 from pkg.misc import get_pkg_otw_size
  43 from pkg.misc import TransferTimedOutException
  44 from pkg.misc import TransferContentException
  45 from pkg.misc import InvalidContentException
  46 from pkg.misc import MAX_TIMEOUT_COUNT
  47 from pkg.misc import retryable_http_errors
  48 
  49 class FileList(object):
  50         """A FileList maintains mappings between files and Actions.
  51         The list is built with knowledge of the Image and the PackagePlan's
  52         associated actions.
  53 
  54         The FileList is responsible for downloading the files needed by the
  55         PkgPlan from the repository. Once downloaded, the FileList generates
  56         the appropriate opener and closer for the actions that it processed.  By
  57         downloading files in a group, it is possible to achieve better
  58         performance.  This is because the FileList asks for the files to be
  59         sent in groups, instead of individual HTTP GET's.
  60 
  61         The caller may limit the maximum number of bytes of content in a
  62         FileList by specifying maxbytes when the object is constructed.
  63         If the caller sets maxbytes to 0, the size of the list is assumed
  64         to be infinite."""
  65 
  66         #
  67         # This value can be tuned by external callers to adjust the
  68         # default "maxbytes" value for a file list.  This value should be
  69         # tuned to the lowest value which provides "good enough" performance;
  70         # tuning beyond 1MB has not in our experiments thus far yielded more
  71         # than a token speedup-- at the expense of interactivity.
  72         #
  73         maxbytes_default = 1024 * 1024
  74 
  75         def __init__(self, image, fmri, progtrack, maxbytes=None):
  76                 """
  77                 Create a FileList object for the specified image and pkgplan.
  78                 """
  79 
  80                 self.image = image
  81                 self.fmri = fmri
  82                 self.progtrack = progtrack
  83                 self.fhash = { }
  84 
  85                 if maxbytes is None:
  86                         self.maxbytes = FileList.maxbytes_default
  87                 else:
  88                         self.maxbytes = maxbytes
  89 
  90                 self.actual_bytes = 0
  91                 self.actual_nfiles = 0
  92                 self.effective_bytes = 0
  93                 self.effective_nfiles = 0
  94 
  95                 if fmri:
  96                         auth, pkg_name, version = self.fmri.tuple()
  97 
  98                         self.authority = pkg.fmri.strip_auth_pfx(auth)
  99                         self.ssl_tuple = self.image.get_ssl_credentials(auth)
 100                         self.uuid = self.image.get_uuid(self.authority)
 101                 else:
 102                         self.authority = None
 103                         self.ssl_tuple = None
 104                         self.uuid = None
 105 
 106                 self.ds = None
 107                 self.url = None
 108 
 109         def add_action(self, action):
 110                 """Add the specified action to the filelist.  The action
 111                 must name a file that can be retrieved from the repository.
 112 
 113                 This method will pull cached content from the download
 114                 directory, if it's available."""
 115 
 116                 # Check if we've got a cached version of the file before
 117                 # trying to add it to the list.  If a cached version is present,
 118                 # create the opener and return.
 119 
 120                 hashval = action.hash
 121                 cache_path = os.path.normpath(os.path.join(
 122                     self.image.cached_download_dir(),
 123                     hash_file_name(hashval)))
 124 
 125                 try:
 126                         if os.path.exists(cache_path):
 127                                 action.data = self._make_opener(cache_path)
 128                                 bytes = get_pkg_otw_size(action)
 129 
 130                                 self._verify_content(action, cache_path)
 131                                 self.progtrack.download_add_progress(1, bytes)
 132 
 133                                 return
 134                 except InvalidContentException:
 135                         # If the content in the cache doesn't match the hash of
 136                         # the action, verify will have already purged the item
 137                         # from the cache.  Reset action.data to None and have
 138                         # _add_action download the file.
 139                         action.data = None
 140 
 141                 while self._is_full():
 142                         self._do_get_files()
 143 
 144                 self._add_action(action)
 145                 
 146         def _add_action(self, action):
 147                 """Add the specified action to the filelist.  The action
 148                 must name a file that can be retrieved from the repository.
 149 
 150                 This method gets invoked when we must go over the network
 151                 to retrieve file content.
 152 
 153                 This is a private method which performs the majority of the
 154                 work for add_content()."""
 155 
 156                 if not hasattr(action, "hash"):
 157                         raise FileListException, "Invalid action type"
 158 
 159                 if self._is_full():
 160                         raise FileListFullException
 161 
 162                 hashval = action.hash
 163 
 164                 # Each fhash key accesses a list of one or more actions.  If we
 165                 # already have a key in the dictionary, get the list and append
 166                 # the action to it.  Otherwise, create a new list with the first
 167                 # action.
 168                 if hashval in self.fhash:
 169                         l = self.fhash[hashval]
 170                         l.append(action)
 171                 else:
 172                         self.fhash[hashval] = [ action ]
 173                         self.actual_nfiles += 1
 174                         self.actual_bytes += get_pkg_otw_size(action)
 175 
 176                 # Regardless of whether files map to the same hash, we
 177                 # also track the total (effective) size and number of entries
 178                 # in the flist, for reporting purposes.
 179                 self.effective_nfiles += 1
 180                 self.effective_bytes += get_pkg_otw_size(action)
 181 
 182         def _clear_mirror(self):
 183                 """Clear any selected DepotStatus and URL assocated with
 184                 a mirror selection."""
 185 
 186                 self.ds = None
 187                 self.url = None
 188 
 189         def _del_hash(self, hash):
 190                 """Given the supplied content hash, remove the entry
 191                 from the flist's dictionary and adjust the counters
 192                 accordingly."""
 193 
 194                 try:
 195                         act_list = self.fhash[hash]
 196                 except KeyError:
 197                         return
 198 
 199                 pkgsz = get_pkg_otw_size(act_list[0])
 200                 nactions = len(act_list)
 201         
 202                 # Update the actual counts by subtracting the first
 203                 # item in the list
 204                 self.actual_nfiles -= 1
 205                 self.actual_bytes -= pkgsz
 206 
 207                 # Now update effective count
 208                 self.effective_nfiles -= nactions
 209                 self.effective_bytes -= nactions * pkgsz
 210 
 211                 # Now delete the entry out of the dictionary
 212                 del self.fhash[hash] 
 213 
 214 
 215         # XXX detect missing size and warn
 216 
 217         def _do_get_files(self):
 218                 """A wrapper around _get_files.  This handles exceptions
 219                 that might occur and deals with timeouts."""
 220 
 221                 retry_count = MAX_TIMEOUT_COUNT
 222                 files_extracted = 0
 223 
 224                 nfiles = self._get_nfiles()
 225                 nbytes = self._get_nbytes()
 226                 chosen_mirrors = set()
 227                 ts = 0
 228 
 229                 while files_extracted == 0:
 230                         try:
 231                                 self._pick_mirror(chosen_mirrors)
 232                                 ts = time.time()
 233 
 234                                 fe = self._get_files()
 235                                 files_extracted += fe
 236 
 237                         except (TransferTimedOutException,
 238                             TransferContentException, InvalidContentException):
 239 
 240                                 retry_count -= 1
 241                                 self.ds.record_error()
 242                                 self._clear_mirror()
 243 
 244                                 if retry_count <= 0:
 245                                         raise TransferTimedOutException
 246                         else:
 247                                 ts = time.time() - ts
 248                                 self.ds.record_success(ts)
 249 
 250                 nfiles -= self._get_nfiles()
 251                 nbytes -= self._get_nbytes()
 252                 self.progtrack.download_add_progress(nfiles, nbytes)
 253 
 254         def _extract_file(self, tarinfo, tar_stream, download_dir):
 255                 """Given a tarinfo object, extract that onto the filesystem
 256                 so it can be installed."""
 257 
 258                 completed_dir = self.image.cached_download_dir()
 259 
 260                 hashval = tarinfo.name
 261 
 262                 # Set the perms of the temporary file. The file must
 263                 # be writable so that the mod time can be changed on Windows
 264                 tarinfo.mode = 0600
 265                 tarinfo.uname = "root"
 266                 tarinfo.gname = "root"
 267 
 268                 # XXX catch IOError if tar stream closes inadvertently?
 269                 tar_stream.extract_to(tarinfo, download_dir, hashval)
 270 
 271                 # Now that the file has been successfully extracted, move
 272                 # it to the cached content directory.
 273                 final_path = os.path.normpath(os.path.join(completed_dir,
 274                     hash_file_name(hashval)))
 275 
 276                 if not os.path.exists(os.path.dirname(final_path)):
 277                         os.makedirs(os.path.dirname(final_path))
 278 
 279                 portable.rename(os.path.join(download_dir, hashval), final_path)
 280 
 281                 # assign opener to actions in the list
 282                 try:
 283                         l = self.fhash[hashval]
 284                 except KeyError:
 285                         # If the key isn't in the dictionary, the server sent us
 286                         # a file we didn't ask for.  In this case, we can't
 287                         # create an opener for it, nor should we leave it in the
 288                         # cache.
 289                         os.remove(final_path)
 290                         return
 291 
 292                 self._verify_content(l[0], final_path)
 293 
 294                 for action in l:
 295                         action.data = self._make_opener(final_path)
 296 
 297                 # Remove successfully extracted items from the hash
 298                 # and adjust bean counters
 299                 self._del_hash(hashval)
 300 
 301 
 302         def flush(self):
 303                 """Ensure that the actions added to the filelist have had
 304                 their data retrieved from the depot."""
 305                 while self._list_size() > 0:
 306                         self._do_get_files()
 307 
 308         def _get_files(self):
 309                 """Instruct the FileList object to download the files
 310                 for the actions that have been associated with this object.
 311 
 312                 This routine will raise a FileListException if the server
 313                 does not support filelist.  Callers of get_files should
 314                 consider catching this exception."""
 315 
 316                 req_dict = { }
 317                 tar_stream = None
 318                 files_extracted = 0
 319 
 320                 url_prefix = self.url
 321 
 322                 download_dir = self.image.incoming_download_dir()
 323                 # Make sure the download directory is there before we start
 324                 # retrieving and extracting files.
 325                 try:
 326                         if not os.path.exists(download_dir):
 327                                 os.makedirs(download_dir)
 328                 except OSError, (errno, errorstr):
 329                         raise RuntimeError("unable to create " \
 330                                 "download directory %s: %s" % 
 331                                 (download_dir, errorstr))
 332 
 333                 for i, k in enumerate(self.fhash.keys()):
 334                         fstr = "File-Name-%s" % i
 335                         req_dict[fstr] = k
 336 
 337                 req_str = urllib.urlencode(req_dict)
 338 
 339                 try:
 340                         f, v = versioned_urlopen(url_prefix, "filelist", [0],
 341                             data=req_str, ssl_creds=self.ssl_tuple,
 342                             imgtype=self.image.type, uuid=self.uuid)
 343                 except RuntimeError:
 344                         raise FileListException, "No server-side support" 
 345                 except urllib2.HTTPError, e:
 346                         # Must check for HTTPError before URLError
 347                         if e.code in retryable_http_errors:
 348                                 raise TransferTimedOutException
 349                         raise
 350                 except urllib2.URLError, e:
 351                         if len(e.args) == 1 and \
 352                             isinstance(e.args[0], socket.timeout):
 353                                 self.image.cleanup_downloads()
 354                                 raise TransferTimedOutException
 355                         raise
 356 
 357                 # Exception handling here is a bit complicated.  The finally
 358                 # block makes sure we always close our file objects.  If we get
 359                 # a socket.timeout we may have gotten an error in the middle of
 360                 # downloading a file. In that case, delete the incoming files we
 361                 # were processing.  They were not successfully retrieved.
 362                 try:
 363                         try:
 364                                 tar_stream = ptf.PkgTarFile.open(mode = "r|",
 365                                     fileobj = f)
 366                                 for info in tar_stream:
 367                                         self._extract_file(info, tar_stream,
 368                                             download_dir)
 369                                         files_extracted += 1
 370                         except socket.timeout:
 371                                 self.image.cleanup_downloads()
 372                                 raise TransferTimedOutException
 373                         except ReadError:
 374                                 raise TransferContentException
 375 
 376                 finally:
 377                         if tar_stream:
 378                                 tar_stream.close()
 379                         f.close()
 380 
 381                 return files_extracted
 382 
 383         def _get_nbytes(self):
 384                 return self.effective_bytes
 385 
 386         def _get_nfiles(self):
 387                 return self.effective_nfiles
 388 
 389         def _is_full(self):
 390                 """Returns true if the FileList object has filled its
 391                 allocated slots and can no longer accept new actions."""
 392 
 393                 if self.maxbytes > 0 and self.actual_bytes >= self.maxbytes:
 394                         return True
 395 
 396                 return False
 397 
 398         def _list_size(self):
 399                 """Returns the current number of files in the filelist."""
 400 
 401                 return len(self.fhash)
 402 
 403         @staticmethod
 404         def _make_opener(filepath):
 405                 def opener():
 406                         f = open(filepath, "rb")
 407                         return f
 408                 return opener                                
 409 
 410         def _pick_mirror(self, chosen_set=None):
 411                 """If we don't already have a DepotStatus or a URL,
 412                 select a mirror, populate the DepotStatus, and choose a URL."""
 413 
 414                 if self.ds and self.url:
 415                         return
 416                 elif self.ds:
 417                         self.url = self.ds.url
 418                 else:
 419                         self.ds = self.image.select_mirror(self.authority,
 420                             chosen_set)
 421                         self.url = self.ds.url
 422                         chosen_set.add(self.ds)
 423 
 424         @staticmethod
 425         def _verify_content(action, filepath):
 426                 """If action contains an attribute that has the compressed
 427                 hash, read the file specified in filepath and verify
 428                 that the hash values match.  If the values do not match,
 429                 remove the file and raise an InvalidContentException."""
 430 
 431                 chash = action.attrs.get("chash", None)
 432                 if not chash:
 433                         return
 434 
 435                 cfile = open(filepath, "rb")
 436                 cdata = cfile.read()
 437                 cfile.close()
 438                 hashobj = sha.new(cdata)
 439                 newhash = hashobj.hexdigest()
 440                 cdata = None
 441 
 442                 if chash != newhash:
 443                        os.remove(filepath)
 444                        raise InvalidContentException(action, newhash)
 445 
 446 
 447 class FileListException(Exception):
 448         def __init__(self, args=None):
 449                 self.args = args
 450 
 451 class FileListFullException(FileListException):
 452         pass