|
|
d5fa376 |
--- chm2pdf.orig 2008-07-09 14:42:26.000000000 +0400
|
|
|
d5fa376 |
+++ chm2pdf 2013-05-18 16:16:48.097014228 +0400
|
|
|
d5fa376 |
@@ -115,8 +115,14 @@
|
|
|
d5fa376 |
urls_list=[]
|
|
|
d5fa376 |
for line in flist.readlines()[3:]:
|
|
|
d5fa376 |
#print 'line',line
|
|
|
d5fa376 |
- spline=line.split()
|
|
|
d5fa376 |
- urls_list.append(spline[5])
|
|
|
d5fa376 |
+ #This won't work if internal paths of CHM contains spaces: e.g. /doc space/ will only become /doc
|
|
|
d5fa376 |
+ #spline=line.split()
|
|
|
d5fa376 |
+ #urls_list.append(spline[5])
|
|
|
d5fa376 |
+ #this should work better:
|
|
|
d5fa376 |
+ spline= re.sub(r".*?normal file\s*(.*?)\n$", "\\1", line)
|
|
|
d5fa376 |
+ if spline[0]=="/":
|
|
|
d5fa376 |
+ #print "got spline="+spline
|
|
|
d5fa376 |
+ urls_list.append( spline)
|
|
|
d5fa376 |
flist.close()
|
|
|
d5fa376 |
# os.remove(CHM2PDF_WORK_DIR+'/urlslist.txt')
|
|
|
d5fa376 |
|
|
|
d5fa376 |
@@ -148,13 +154,17 @@
|
|
|
d5fa376 |
|
|
|
d5fa376 |
img_filename = ''
|
|
|
d5fa376 |
for item in objective_urls:
|
|
|
d5fa376 |
- if iurl in item:
|
|
|
d5fa376 |
+ #objective_urls has "real path", whereas image_catcher.imgurls can contain %20!
|
|
|
d5fa376 |
+ #e.g. item='/doc space/image path/velocity space.gif iurl=image%20path/velocity%20space.gif
|
|
|
d5fa376 |
+ iiurl= re.sub('%20',' ',iurl)
|
|
|
d5fa376 |
+ if iiurl in item:
|
|
|
d5fa376 |
img_filename=CHM2PDF_ORIG_DIR+item
|
|
|
d5fa376 |
if ';' in img_filename: #hack to get rid of mysterious ; in filenames and urls...
|
|
|
d5fa376 |
img_filename=img_filename.split(';')[0]
|
|
|
d5fa376 |
# substitute the new image filenames - but only if an img_filename was found!
|
|
|
d5fa376 |
if img_filename:
|
|
|
d5fa376 |
- page=re.sub(iurl,img_filename,page)
|
|
|
d5fa376 |
+ #r = Python also has "raw strings" which do not apply special treatment to backslashes
|
|
|
d5fa376 |
+ page=re.sub(r'(?i)"'+iurl,'"'+re.sub('\\\\ ', ' ', img_filename),page)
|
|
|
d5fa376 |
|
|
|
d5fa376 |
|
|
|
d5fa376 |
# We substitute the CSS URLs of input_file with the *actual* URLs on the CHM2PDF_ORIG_DIR directory
|
|
|
d5fa376 |
@@ -459,6 +469,10 @@
|
|
|
d5fa376 |
page=pf.read()
|
|
|
d5fa376 |
pf.close()
|
|
|
d5fa376 |
|
|
|
d5fa376 |
+ # Some names contain a '%20' (an HTML code for a space). We substitute with a "real space"
|
|
|
d5fa376 |
+ # otherwise we won't be able to match to the real files.
|
|
|
d5fa376 |
+ page = re.sub('%20',' ',page)
|
|
|
d5fa376 |
+
|
|
|
d5fa376 |
# Substitutions in 1st pass: we replace the original filenames with their corresponding "garbled" equivalents.
|
|
|
d5fa376 |
for match_string in match_strings:
|
|
|
d5fa376 |
replace_string = replace_garbled_strings[match_strings.index(match_string)]
|