Tree - rpms/chm2pdf - src.fedoraproject.org

rpms / chm2pdf

Overview Files Commits Branches Forks Releases

Monitoring status:

Orphaned for:

Take

Bugzilla Assignee:

Fedora:: orphan
EPEL:: orphan

Files

Commit: 4db29e80032d4387b9ee2ee5be00586684c09edb

Blob Blame History Raw

--- chm2pdf.orig	2008-07-09 14:42:26.000000000 +0400
+++ chm2pdf	2013-05-18 16:16:48.097014228 +0400
@@ -115,8 +115,14 @@
     urls_list=[]
     for line in flist.readlines()[3:]:
         #print 'line',line
-        spline=line.split()
-        urls_list.append(spline[5])
+        #This won't work if internal paths of CHM contains spaces: e.g. /doc space/ will only become /doc
+        #spline=line.split()
+        #urls_list.append(spline[5])
+        #this should work better:
+        spline= re.sub(r".*?normal file\s*(.*?)\n$", "\\1", line)
+        if spline[0]=="/":
+          #print "got spline="+spline
+          urls_list.append( spline)
     flist.close()
     # os.remove(CHM2PDF_WORK_DIR+'/urlslist.txt')
     
@@ -148,13 +154,17 @@
 
         img_filename = ''
         for item in objective_urls:
-            if iurl in item:
+            #objective_urls has "real path", whereas image_catcher.imgurls can contain %20!
+            #e.g. item='/doc space/image path/velocity space.gif  iurl=image%20path/velocity%20space.gif
+            iiurl= re.sub('%20',' ',iurl)
+            if iiurl in item:
                 img_filename=CHM2PDF_ORIG_DIR+item
                 if ';' in img_filename: #hack to get rid of mysterious ; in filenames and urls...
                     img_filename=img_filename.split(';')[0]
         # substitute the new image filenames - but only if an img_filename was found!
         if img_filename:
-            page=re.sub(iurl,img_filename,page)
+            #r = Python also has "raw strings" which do not apply special treatment to backslashes
+            page=re.sub(r'(?i)"'+iurl,'"'+re.sub('\\\\ ', ' ', img_filename),page)
             
 
     # We substitute the CSS URLs of input_file with the *actual* URLs on the CHM2PDF_ORIG_DIR directory
@@ -459,6 +469,10 @@
             page=pf.read()
             pf.close()
     
+            # Some names contain a '%20' (an HTML code for a space). We substitute with a "real space"
+            # otherwise we won't be able to match to the real files.
+            page = re.sub('%20',' ',page)
+
             # Substitutions in 1st pass: we replace the original filenames with their corresponding "garbled" equivalents.
             for match_string in  match_strings:
                 replace_string = replace_garbled_strings[match_strings.index(match_string)]

rpms / chm2pdf

Source Code

Files