d5fa376
--- chm2pdf.orig	2008-07-09 14:42:26.000000000 +0400
d5fa376
+++ chm2pdf	2013-05-18 16:16:48.097014228 +0400
d5fa376
@@ -115,8 +115,14 @@
d5fa376
     urls_list=[]
d5fa376
     for line in flist.readlines()[3:]:
d5fa376
         #print 'line',line
d5fa376
-        spline=line.split()
d5fa376
-        urls_list.append(spline[5])
d5fa376
+        #This won't work if internal paths of CHM contains spaces: e.g. /doc space/ will only become /doc
d5fa376
+        #spline=line.split()
d5fa376
+        #urls_list.append(spline[5])
d5fa376
+        #this should work better:
d5fa376
+        spline= re.sub(r".*?normal file\s*(.*?)\n$", "\\1", line)
d5fa376
+        if spline[0]=="/":
d5fa376
+          #print "got spline="+spline
d5fa376
+          urls_list.append( spline)
d5fa376
     flist.close()
d5fa376
     # os.remove(CHM2PDF_WORK_DIR+'/urlslist.txt')
d5fa376
     
d5fa376
@@ -148,13 +154,17 @@
d5fa376
 
d5fa376
         img_filename = ''
d5fa376
         for item in objective_urls:
d5fa376
-            if iurl in item:
d5fa376
+            #objective_urls has "real path", whereas image_catcher.imgurls can contain %20!
d5fa376
+            #e.g. item='/doc space/image path/velocity space.gif  iurl=image%20path/velocity%20space.gif
d5fa376
+            iiurl= re.sub('%20',' ',iurl)
d5fa376
+            if iiurl in item:
d5fa376
                 img_filename=CHM2PDF_ORIG_DIR+item
d5fa376
                 if ';' in img_filename: #hack to get rid of mysterious ; in filenames and urls...
d5fa376
                     img_filename=img_filename.split(';')[0]
d5fa376
         # substitute the new image filenames - but only if an img_filename was found!
d5fa376
         if img_filename:
d5fa376
-            page=re.sub(iurl,img_filename,page)
d5fa376
+            #r = Python also has "raw strings" which do not apply special treatment to backslashes
d5fa376
+            page=re.sub(r'(?i)"'+iurl,'"'+re.sub('\\\\ ', ' ', img_filename),page)
d5fa376
             
d5fa376
 
d5fa376
     # We substitute the CSS URLs of input_file with the *actual* URLs on the CHM2PDF_ORIG_DIR directory
d5fa376
@@ -459,6 +469,10 @@
d5fa376
             page=pf.read()
d5fa376
             pf.close()
d5fa376
     
d5fa376
+            # Some names contain a '%20' (an HTML code for a space). We substitute with a "real space"
d5fa376
+            # otherwise we won't be able to match to the real files.
d5fa376
+            page = re.sub('%20',' ',page)
d5fa376
+
d5fa376
             # Substitutions in 1st pass: we replace the original filenames with their corresponding "garbled" equivalents.
d5fa376
             for match_string in  match_strings:
d5fa376
                 replace_string = replace_garbled_strings[match_strings.index(match_string)]