James Antill eb9900a
commit 83fcbe745c3ee8f5f0fa29626a86c824db059b22
James Antill eb9900a
Author: James Antill <james@and.org>
James Antill eb9900a
Date:   Thu Feb 7 12:58:07 2013 -0500
James Antill eb9900a
James Antill eb9900a
    Fix problems with mirrors like wtfnix.com, delete bad MD files.
James Antill eb9900a
James Antill eb9900a
diff --git a/yum/yumRepo.py b/yum/yumRepo.py
James Antill eb9900a
index dfcf8f9..efbc42a 100644
James Antill eb9900a
--- a/yum/yumRepo.py
James Antill eb9900a
+++ b/yum/yumRepo.py
James Antill eb9900a
@@ -940,6 +941,7 @@ Insufficient space in download directory %s
James Antill eb9900a
                                     range=(start, end),
James Antill eb9900a
                                     )
James Antill eb9900a
             except URLGrabError, e:
James Antill eb9900a
+                self._del_dl_file(local, size)
James Antill eb9900a
                 errstr = "failed to retrieve %s from %s\nerror was %s" % (relative, self, e)
James Antill eb9900a
                 if self.mirrorurls:
James Antill eb9900a
                     errstr +="\n  You could try running: yum clean expire-cache"
James Antill eb9900a
@@ -961,6 +963,7 @@ Insufficient space in download directory %s
James Antill eb9900a
                                            **kwargs
James Antill eb9900a
                                            )
James Antill eb9900a
             except URLGrabError, e:
James Antill eb9900a
+                self._del_dl_file(local, size)
James Antill eb9900a
                 errstr = "failure: %s from %s: %s" % (relative, self, e)
James Antill eb9900a
                 errors = getattr(e, 'errors', None)
James Antill eb9900a
                 raise Errors.NoMoreMirrorsRepoError(errstr, errors)
James Antill eb9900a
@@ -1652,6 +1655,18 @@ Insufficient space in download directory %s
James Antill eb9900a
             raise URLGrabError(-1, 'repomd.xml does not match metalink for %s' %
James Antill eb9900a
                                self)
James Antill eb9900a
 
James Antill eb9900a
+    def _del_dl_file(self, local, size):
James Antill eb9900a
+        """ Delete a downloaded file if it's the correct size. """
James Antill eb9900a
+
James Antill eb9900a
+        sd = misc.stat_f(local)
James Antill eb9900a
+        if not sd: # File doesn't exist...
James Antill eb9900a
+            return
James Antill eb9900a
+
James Antill eb9900a
+        if size and sd.st_size < size:
James Antill eb9900a
+            return # Still more to get...
James Antill eb9900a
+
James Antill eb9900a
+        # Is the correct size, or too big ... delete it so we'll try again.
James Antill eb9900a
+        misc.unlink_f(local)
James Antill eb9900a
 
James Antill eb9900a
     def checkMD(self, fn, mdtype, openchecksum=False):
James Antill eb9900a
         """check the metadata type against its checksum"""
James Antill eb9900a
@@ -1681,7 +1696,7 @@ Insufficient space in download directory %s
James Antill eb9900a
         if size is not None:
James Antill eb9900a
             size = int(size)
James Antill eb9900a
 
James Antill eb9900a
-        if fast:
James Antill eb9900a
+        if fast and skip_old_DBMD_check:
James Antill eb9900a
             fsize = misc.stat_f(file)
James Antill eb9900a
             if fsize is None: # File doesn't exist...
James Antill eb9900a
                 return None
James Antill eb9900a
@@ -1756,16 +1771,21 @@ Insufficient space in download directory %s
James Antill eb9900a
 
James Antill eb9900a
         try:
James Antill eb9900a
             def checkfunc(obj):
James Antill eb9900a
-                self.checkMD(obj, mdtype)
James Antill eb9900a
+                try:
James Antill eb9900a
+                    self.checkMD(obj, mdtype)
James Antill eb9900a
+                except URLGrabError:
James Antill eb9900a
+                    #  Don't share MD among mirrors, in theory we could use:
James Antill eb9900a
+                    #     self._del_dl_file(local, int(thisdata.size))
James Antill eb9900a
+                    # ...but this is safer.
James Antill eb9900a
+                    misc.unlink_f(obj.filename)
James Antill eb9900a
+                    raise
James Antill eb9900a
                 self.retrieved[mdtype] = 1
James Antill eb9900a
             text = "%s/%s" % (self, mdtype)
James Antill eb9900a
             if thisdata.size is None:
James Antill eb9900a
                 reget = None
James Antill eb9900a
             else:
James Antill eb9900a
                 reget = 'simple'
James Antill eb9900a
-                if os.path.exists(local):
James Antill eb9900a
-                    if os.stat(local).st_size >= int(thisdata.size):
James Antill eb9900a
-                        misc.unlink_f(local)
James Antill eb9900a
+                self._del_dl_file(local, int(thisdata.size))
James Antill eb9900a
             local = self._getFile(relative=remote,
James Antill eb9900a
                                   local=local, 
James Antill eb9900a
                                   copy_local=1,
James Antill eb9900a
commit c148eb10b798270b3d15087433c8efb2a79a69d0
James Antill eb9900a
Author: James Antill <james@and.org>
James Antill eb9900a
Date:   Mon Feb 18 16:17:06 2013 -0500
James Antill eb9900a
James Antill eb9900a
    Use xattr data as well as file size for "fast checksumming".
James Antill eb9900a
James Antill eb9900a
diff --git a/yum/yumRepo.py b/yum/yumRepo.py
James Antill eb9900a
index efbc42a..8c38093 100644
James Antill eb9900a
--- a/yum/yumRepo.py
James Antill eb9900a
+++ b/yum/yumRepo.py
James Antill eb9900a
@@ -52,15 +52,54 @@ import stat
James Antill eb9900a
 import errno
James Antill eb9900a
 import tempfile
James Antill eb9900a
 
James Antill eb9900a
-#  If you want yum to _always_ check the MD .sqlite files then set this to
James Antill eb9900a
-# False (this doesn't affect .xml files or .sqilte files derived from them).
James Antill eb9900a
-# With this as True yum will only check when a new repomd.xml or
James Antill eb9900a
-# new MD is downloaded.
James Antill eb9900a
-#  Note that with atomic MD, we can't have old MD lying around anymore so
James Antill eb9900a
-# the only way we need this check is if someone does something like:
James Antill eb9900a
-#   cp primary.sqlite /var/cache/yum/blah
James Antill eb9900a
-# ...at which point you lose.
James Antill eb9900a
-skip_old_DBMD_check = True
James Antill eb9900a
+# This is unused now, probably nothing uses it but it was global/public.
James Antill eb9900a
+skip_old_DBMD_check = False
James Antill eb9900a
+
James Antill eb9900a
+try:
James Antill eb9900a
+    import xattr
James Antill eb9900a
+    if not hasattr(xattr, 'get') or not hasattr(xattr, 'set'):
James Antill eb9900a
+        xattr = None # This is a "newer" API.
James Antill eb9900a
+except ImportError:
James Antill eb9900a
+    xattr = None
James Antill eb9900a
+
James Antill eb9900a
+#  The problem we are trying to solve here is that:
James Antill eb9900a
+#
James Antill eb9900a
+# 1. We rarely want to be downloading MD/pkgs/etc.
James Antill eb9900a
+# 2. We want to check those files are valid (match checksums) when we do
James Antill eb9900a
+#    download them.
James Antill eb9900a
+# 3. We _really_ don't want to checksum all the files everytime we
James Antill eb9900a
+#    run (100s of MBs).
James Antill eb9900a
+# 4. We can continue to download files from bad mirrors, or retry files due to
James Antill eb9900a
+#    C-c etc.
James Antill eb9900a
+#
James Antill eb9900a
+# ...we used to solve this by just checking the file size, and assuming the
James Antill eb9900a
+# files had been downloaded and checksumed as correct if that matched. But that
James Antill eb9900a
+# was error prone on bad mirrors, so now we store the checksum in an
James Antill eb9900a
+# xattr ... this does mean that if you can't store xattrs (Eg. NFS) you will
James Antill eb9900a
+# rechecksum everything constantly.
James Antill eb9900a
+
James Antill eb9900a
+def _xattr_get_chksum(filename, chktype):
James Antill eb9900a
+    if not xattr:
James Antill eb9900a
+        return None
James Antill eb9900a
+
James Antill eb9900a
+    try:
James Antill eb9900a
+        ret = xattr.get(filename, 'user.yum.checksum.' + chktype)
James Antill eb9900a
+    except: # Documented to be "EnvironmentError", but make sure
James Antill eb9900a
+        return None
James Antill eb9900a
+
James Antill eb9900a
+    return ret
James Antill eb9900a
+
James Antill eb9900a
+def _xattr_set_chksum(filename, chktype, chksum):
James Antill eb9900a
+    if not xattr:
James Antill eb9900a
+        return None
James Antill eb9900a
+
James Antill eb9900a
+    try:
James Antill eb9900a
+        xattr.set(filename, 'user.yum.checksum.' + chktype, chksum)
James Antill eb9900a
+    except:
James Antill eb9900a
+        return False # Data too long. = IOError ... ignore everything.
James Antill eb9900a
+
James Antill eb9900a
+    return True
James Antill eb9900a
+
James Antill eb9900a
 
James Antill eb9900a
 warnings.simplefilter("ignore", Errors.YumFutureDeprecationWarning)
James Antill eb9900a
 
James Antill eb9900a
@@ -228,7 +267,7 @@ class YumPackageSack(packageSack.PackageSack):
James Antill eb9900a
         # get rid of all this stuff we don't need now
James Antill eb9900a
         del repo.cacheHandler
James Antill eb9900a
 
James Antill eb9900a
-    def _check_uncompressed_db_gen(self, repo, mdtype, fast=True):
James Antill eb9900a
+    def _check_uncompressed_db_gen(self, repo, mdtype):
James Antill eb9900a
         """return file name of db in gen/ dir if good, None if not"""
James Antill eb9900a
 
James Antill eb9900a
         mydbdata         = repo.repoXML.getData(mdtype)
James Antill eb9900a
@@ -238,7 +277,7 @@ class YumPackageSack(packageSack.PackageSack):
James Antill eb9900a
         db_un_fn         = mdtype + '.sqlite'
James Antill eb9900a
 
James Antill eb9900a
         if not repo._checkMD(compressed_fn, mdtype, data=mydbdata,
James Antill eb9900a
-                             check_can_fail=fast, fast=fast):
James Antill eb9900a
+                             check_can_fail=True):
James Antill eb9900a
             return None
James Antill eb9900a
 
James Antill eb9900a
         ret = misc.repo_gen_decompress(compressed_fn, db_un_fn,
James Antill eb9900a
@@ -261,8 +300,7 @@ class YumPackageSack(packageSack.PackageSack):
James Antill eb9900a
         result = None
James Antill eb9900a
 
James Antill eb9900a
         if os.path.exists(db_un_fn):
James Antill eb9900a
-            if skip_old_DBMD_check and repo._using_old_MD:
James Antill eb9900a
-                return db_un_fn
James Antill eb9900a
+
James Antill eb9900a
 
James Antill eb9900a
             try:
James Antill eb9900a
                 repo.checkMD(db_un_fn, mdtype, openchecksum=True)
James Antill eb9900a
@@ -296,7 +334,6 @@ class YumRepository(Repository, config.RepoConf):
James Antill eb9900a
                                               # eventually want
James Antill eb9900a
         self.repoMDFile = 'repodata/repomd.xml'
James Antill eb9900a
         self._repoXML = None
James Antill eb9900a
-        self._using_old_MD = None
James Antill eb9900a
         self._oldRepoMDData = {}
James Antill eb9900a
         self.cache = 0
James Antill eb9900a
         self.mirrorlistparsed = 0
James Antill eb9900a
@@ -1407,7 +1444,6 @@ Insufficient space in download directory %s
James Antill eb9900a
             self._revertOldRepoXML()
James Antill eb9900a
             return False
James Antill eb9900a
 
James Antill eb9900a
-        self._using_old_MD = caching
James Antill eb9900a
         if caching:
James Antill eb9900a
             return False # Skip any work.
James Antill eb9900a
 
James Antill eb9900a
@@ -1673,7 +1709,7 @@ Insufficient space in download directory %s
James Antill eb9900a
         return self._checkMD(fn, mdtype, openchecksum)
James Antill eb9900a
 
James Antill eb9900a
     def _checkMD(self, fn, mdtype, openchecksum=False,
James Antill eb9900a
-                 data=None, check_can_fail=False, fast=False):
James Antill eb9900a
+                 data=None, check_can_fail=False):
James Antill eb9900a
         """ Internal function, use .checkMD() from outside yum. """
James Antill eb9900a
 
James Antill eb9900a
         thisdata = data # So the argument name is nicer
James Antill eb9900a
@@ -1696,17 +1732,15 @@ Insufficient space in download directory %s
James Antill eb9900a
         if size is not None:
James Antill eb9900a
             size = int(size)
James Antill eb9900a
 
James Antill eb9900a
-        if fast and skip_old_DBMD_check:
James Antill eb9900a
+        l_csum = _xattr_get_chksum(file, r_ctype)
James Antill eb9900a
+        if l_csum:
James Antill eb9900a
             fsize = misc.stat_f(file)
James Antill eb9900a
-            if fsize is None: # File doesn't exist...
James Antill eb9900a
-                return None
James Antill eb9900a
-            if size is None:
James Antill eb9900a
-                return 1
James Antill eb9900a
-            if size == fsize.st_size:
James Antill eb9900a
-                return 1
James Antill eb9900a
-            if check_can_fail:
James Antill eb9900a
-                return None
James Antill eb9900a
-            raise URLGrabError(-1, 'Metadata file does not match size')
James Antill eb9900a
+            if fsize is not None: # We just got an xattr, so it should be there
James Antill eb9900a
+                if size is None and l_csum == r_csum:
James Antill eb9900a
+                    return 1
James Antill eb9900a
+                if size == fsize.st_size and l_csum == r_csum:
James Antill eb9900a
+                    return 1
James Antill eb9900a
+            # Anything goes wrong, run the checksums as normal...
James Antill eb9900a
 
James Antill eb9900a
         try: # get the local checksum
James Antill eb9900a
             l_csum = self._checksum(r_ctype, file, datasize=size)
James Antill eb9900a
@@ -1716,6 +1750,7 @@ Insufficient space in download directory %s
James Antill eb9900a
             raise URLGrabError(-3, 'Error performing checksum')
James Antill eb9900a
 
James Antill eb9900a
         if l_csum == r_csum:
James Antill eb9900a
+            _xattr_set_chksum(file, r_ctype, l_csum)
James Antill eb9900a
             return 1
James Antill eb9900a
         else:
James Antill eb9900a
             if check_can_fail: