User:User A1/svgTinker.py

  1. !/usr/bin/python

from BeautifulSoup import BeautifulStoneSoup, Tag

import sys

import re

  1. Split all inkscape style=" " into known attributes for that tag

def splitInkscapeStyle(tag) :

if not tag["style"] :

return False

breakAttr= [ "font-face", "font-size", "font-family" ]

d={}

strStyle=tag["style"]

print "style is" + str(strStyle)

splitStyle=strStyle.split(";")

for i in splitStyle :

print "I is : " + i

if i:

splitter=i.rsplit(":")

tag[splitter[0] ] = splitter[1]

for t, val in d:

tag[t] = val

del tag['style']

  1. By Peter Waller, BS: Replacing a tag with its contents, BeautifulSoup mailing list

def tagRemove(tag, tagname):

# Locate the span tag's position

origIndex = tag.parent.contents.index(tag)

# For each element in tag.contents, insert it before this tag

# Use a list here, otherwise the list will shrink as elements are

# disconnected from 'tag' and inserted into tag.parent

for i, content in enumerate(tag.contents):

tag.parent.insert(i+origIndex, content)

# Excise the now empty span tag

tag.extract()

def epsilon():

eps=1.0

while eps + 1.0 > 1.0 :

eps //= 2

return eps

def hasFontFace(tag):

if not tag.string:

return False

#Check for encoded font base64

return tag.string.find("@font-face")

  1. Takes a stone-soup tag and applies various
  2. workaround fixes of dubious effectiveness

def fontFix(tag):

bold=False

italic=False

dejavu=False

dejaVuRe=re.compile("'?(?i)dejavusans.*")

boldRe=re.compile("(?i).*-bold.*")

italicRe=re.compile("(?i).*-italic.*")

fontRe=re.compile("(?i)-.*")

for i in tag.attrs :

if i[0] == "font-family" :

#Check the font types and perform font substitution

bold=boldRe.match(i[1])

italic=italicRe.match(i[1])

dejavu=dejaVuRe.match(i[1])

#Strip font bold/italic embed

tmp = fontRe.split(i[1])

fontAttr=tmp[0]

i = (i[0],fontAttr)

break

#if none of the above apply we can skip

if not bold and not italic and not dejavu:

return

str=""

if bold:

str+="bad bolding method "

if italic:

str+="bad italicising method "

if dejavu:

str+="wrong font name"

print "Fixing tag : " + str

print tag

#Otherwise we have work to do!

haveWeight=False

haveStyle=False

for i in tag.attrs:

#find any bold font-weight tag

if i[0] == "font-weight":

haveWeight=True

continue

if i[0] == "font-style":

haveStyle=True

continue

#Check for bold

if bold:

if haveWeight:

if not re.match(i[1],".*(?i)bold.*"):

tag["font-weight"]+=";Bold"

else:

tag["font-weight"]="Bold"

tag["font-family"]=re.sub("(?i)-Bold","",tag["font-family"])

#Check for italics

if italic and haveStyle:

if not re.match(i[1],".*(?i)italic.*"):

tag["font-style"]+=";Italic"

else:

if italic and not haveStyle:

tag["font-style"]="Italic"

#Fix dejavu vs Deja Vu

if dejavu:

tag["font-family"]="DejaVu Sans"

  1. Check to see if a small font is being used in conjunction with

def fontSizeFix(tag):

#without a transformation there is nothing we can do

if "transform" not in tag:

return False

#Find the parent tag with the font-size parameter

haveFontSize=False

thisParent=tag

while not haveFontSize :

if thisParent.has_key("font-size") :

haveFontSize=True

else :

haveFontSize=False

if thisParent.parent :

thisParent=thisParent.parent

else :

break

#check to see that we found the correct parent tag

if not haveFontSize:

return False

else :

parentTag=thisParent

matrixRe=re.compile(".*(?i)matrix\(")

scaleRe=re.compile(".*(?i)scale\(")

if matrixRe.match(tag["transform"]) :

#grab the matrix

trans=re.sub(".*(?i)matrix\(","",tag["transform"])

trans=re.sub("\)","",trans)

#split the transformation matirx

m = re.split("(\ |,)",trans)

m=filter(lambda x: not (x=="" or x==" " or x==",") ,m)

else:

if scaleRe.match(tag["transform"]) :

#grab the matrix components (11,22)

trans=re.sub(".*(?i)scale\(","",tag["transform"])

trans=re.sub("\)","",trans)

#split the transformation matirx

m = re.split("(\ |,)",trans)

m=filter(lambda x: not (x=="" or x==" " or x==",") ,m)

assert len(m) == 2

#construct m as a list in Mx+b form

m = [ m[0] , "0" ,"0" ,m[1] ,"0", "0" ]

#Transform should be of the form y=Mx+b

print m

assert len(m) == 6

mF=[]

for i in m:

mF.append(float(i))

m=mF

print m

EPSILON=0.001

if abs(m[1]) < EPSILON and abs(m[2]) < EPSILON:

#OK, so M is a diagonal matrix

print "so far so good"

if abs(m[0]) > abs(m[3]) :

factor=m[0]

else:

factor=m[3]

if factor > 1:

#Pump up the font size by factor, then reduce the matrix

fsStr=parentTag["font-size"]

fsStr=fsStr.strip("px")

fontSize =float(fsStr)

parentTag["font-size"] = fontSize*factor

m[0] = m[0]/factor

m[3] = m[3]/factor

tag["transform"] = "matrix(" + str(m[0]) + " " + str(m[1]) + " " + str(m[2]) + " " + str(m[3]) + " "+ str(m[4]) + " " + str(m[5]) + ")"

  1. Crappy font substitution routine

def fontSub(tag):

preferredFont = []

preferredFont.append((re.compile("(?i)'?Arial.*"),"DejaVu Sans"))

preferredFont.append((re.compile("(?i)'?Times new roman.*"),"Times"))

for i in tag.attrs :

if i[0] == "font-family" :

#Substitute fonts from our preferred font table

for j in preferredFont:

if j[0].match(i[1])

tag["font-family"]=j[1]

break

def main():

if len(sys.argv) != 3:

print "Usage: svgTinker.py inputFile outputFile"

quit(1)

f = open(sys.argv[1])

if not f :

print "File does not exist or could not be read"

quit(1)

xmlText = f.read()

soup=BeautifulStoneSoup(xmlText)

#find all style="..." tags

styleTags=soup.findAll(style=True)

for i in styleTags:

splitInkscapeStyle(i)

tags=soup.findAll("text")

#Correct all font tags

for i in tags:

fontFamilyTag=False

fontSizeTag=False

fontTrasnformTag=False

if i.attrs:

for j in i.attrs :

#Check to see what attrs this guy has

if re.match("(?i)font-family",j[0]):

fontFamilyTag=True

continue

if re.match("(?i)transform",j[0]):

fontTransformTag=True

continue

if re.match("(?i)font-size",j[0]):

fontSizeTag=True

if fontFamilyTag :

fontFix(i)

fontSub(i)

continue

if fontTransformTag :

fontSizeFix(i)

#Fonts can also be stored in g elements.

tags=soup.findAll("g")

for i in tags:

fontTag=False

if i.attrs:

for j in i.attrs :

if re.match("(?i)font-family",j[0]):

fontTag=True

break

if fontTag :

fontFix(i)

fontSub(i)

tags=soup.findAll("tspan")

#Nuke the tspans, preserving children

for i in tags:

tagRemove(i,"tspans")

tags=soup.findAll("style")

#Find base64 encoded data and destroy it

#FIXME: Not sure how to trick soup into inserting "" vs "<>", so use instead

emptyTag = Tag(soup, "g")

for i in tags:

if hasFontFace(i):

i.replaceWith(emptyTag)

try:

f=open(sys.argv[2],'w')

except:

print('Unable to open file for writing. aborting')

quit(1)

#prettify soup data

soup.prettify()

#save modified svg data

f.write(str(soup))

print("Wrote file : " + sys.argv[2])

if __name__ == "__main__":

main()