User:Tardis/regdiff.py

  1. !/usr/bin/env python
  2. regdiff.py
  3. Created November 9 2008
  4. Updated November 10 2008
  5. Version 0.3.1
  1. This program is free software; you can redistribute it and/or modify it under
  2. the terms of the GNU General Public License version 2, the GNU Free
  3. Documentation License version 1.2 (with no Invariant Sections, with no
  4. Front-Cover Texts, and with no Back-Cover Texts), or (at your option) any
  5. later version of either license. It is distributed in the hope that it will
  6. be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
  7. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
  8. Public License for more details.
  1. Description:
  2. Compares two Windows registry (.REG) files and produces a third such that
  3. applying it transforms a registry represented by the first into a registry
  4. represented by the second. Usually you should redirect the output to a file.
  1. Set displayProgress to a positive value n to report with a # every n lines.
  1. Warnings:
  2. The output deletes all keys and values present in the first but not in the
  3. second! Therefore, this should be run only on exports of complete subtrees.
  1. The input must be sorted (values sorted within each key); I believe that
  2. Regedit's export does this, and will also guarantee complete subtrees.
  1. It's probably wise to double check any key removals in the output: look for
  2. lines beginning with the two characters "[-". Modern .REG files use UTF-16,
  3. so some editors and tools (like grep) may have trouble with them. Using
  4. "grep '^.\[.-' out.reg" should work to at least detect removals.
  1. You can disable deletions of keys or values entirely by setting the
  2. appropriate variables (allow*Delete). If keys cannot be deleted but values
  3. can, each value named in the old file under a key that would be deleted will
  4. be deleted. (This is not as dangerous because reapplying the old file will
  5. restore them.)
  1. History:
  2. Version 0.1, November 9 2008:
  3. - initial release
  4. Version 0.2, November 9 2008:
  5. - use CRLF newlines
  6. - support deletion disabling
  7. - detect misorderings
  8. Version 0.3, November 9 2008:
  9. - make allowKeyDelete apply to the last keys to delete
  10. - verify continuous progress
  11. - support displaying progress
  12. Version 0.3.1, November 10 2008:
  13. - use case-insensitive and subkey-aware comparisons for ordering
  1. Bugs:
  2. Assumes that no key contains assignments to @ and to "" -- I think the latter
  3. is invalid anyway.
  1. I don't know whether .REG files are really UTF-16 or UCS-2.
  1. I'm not sure that the last blank line is really necessary; a trailing CRLF
  2. may be sufficient.

import sys,codecs

def keycompare(a,b):

"""Return an integer indicating the ordering of keys a and b."""

return cmp(a.lower().split('\\'),b.lower().split('\\'))

class line(object):

def __init__(self,s,k=None):

"""Parse s and make a line object.

Inherit key from line k unless we are a key or it is omitted or None.

Use k to detect misordered input if it is not None.

Names are not unescaped, but escaping is considered in their extent."""

self.old=False

self.str=s

self.eof=s is None

self.iskey=not self.eof and len(s)>1 and s[0]=='['

index=1 # points past end of name

if self.eof:

self.name=None

self.delete=False

self.lastkey=None

elif self.iskey:

self.delete=s[1]=='-'

self.lastkey=self.name=s[1+self.delete:-3] # ends in "]\r\n"

if k is not None and k.lastkey is not None and\

keycompare(self.lastkey,k.lastkey)<0:

raise ValueError,"key %r precedes %r in input"%\

(k.lastkey,self.lastkey)

else:

if s[0]=='"':

quote=False

for c in s[1:]:

index+=1

if quote: quote=False

elif c=='\\': quote=True

elif c=='"': break

else: raise IOError,"unterminated name in "+repr(s)

elif s[0]!='@': raise IOError,"unrecognized format: "+repr(s)

# The name for @ is "", which properly sorts before everything.

self.name=s[1:index-1]

assign=s[index:].lstrip()

if len(assign)<2 or assign[0]!='=':

raise IOError,"no assignment in" +repr(s)

self.delete=assign[1]=='-'

if k is None:

self.lastkey=None

else:

self.lastkey=k.lastkey

if not k.iskey and self.name.lower()

raise ValueError,"value %r precedes %r in input"%\

(k.name,self.name)

def valname(self):

"""Return the original form of this value's name."""

if self.iskey: raise ValueError,"this is not a value"

return '"'+self.name+'"' if self.name else '@'

def __str__(self):

return self.__class__.__name__+\

('['+("EOF" if self.eof else

repr(self.str)+(" (key)" if self.iskey else

" in key "+repr(self.lastkey)))+

": "+repr(self.name)+']')

class keyprint(object):

def __init__(self,o):

self.key=None

self.out=o

def __call__(self,k):

if k!=self.key:

self.key=k

self.out.write("\r\n["+k+"]\r\n")

def terminated(s):

"""Return true if the string contains an even number of unquoted \"s and does

not end in an unescaped backslash."""

quote=False

ret=True

for c in s:

if quote: quote=False

elif c=='\\': quote=True

elif c=='"': ret=not ret

return ret and not quote

def nextLogical(f):

"""Return the next logical line from a file object.

Never returns a null string.

Return None at EOF."""

ret=""

done=False

while not done:

l=f.readline()

if l=="":

if ret=="": return None

else: raise IOError,"file ends with escape or in string"

c=l.rstrip('\r\n')

if c=="" and ret=="": continue # skip initial blank lines

ret+=c

done=terminated(ret)

if c!=l: ret+="\r\n"

return ret

def isunder(s,r):

"""Return non-nil if the key s is in the tree rooted at r."""

return s==r or s.startswith(r+'\\');

if len(sys.argv)!=3:

print >>sys.stderr,"usage: "+sys.argv[0]+" old.reg new.reg"

sys.exit(2) # BAD_ARGS

allowKeyDelete=True

allowValueDelete=True

displayProgress=0

ci=codecs.lookup("utf_16")

fo=ci.streamreader(open(sys.argv[1],'rb'))

fn=ci.streamreader(open(sys.argv[2],'rb'))

out=ci.streamwriter(sys.stdout)

kp=keyprint(out)

head=fo.readline()

if fn.readline()!=head:

raise IOError,"different file headers"

out.write(head.rstrip('\r\n')+"\r\n")

o=n=line(None)

o.old=True

killing=False # the tree being deleted, if any

iters=0

while True:

iters+=1

if displayProgress and iters%displayProgress==0:

sys.stderr.write('#')

if o.old: o=line(nextLogical(fo),o)

if n.old: n=line(nextLogical(fn),n)

if o.eof and n.eof: break

if o.delete or n.delete: raise IOError,"input contains deletion requests"

# Determine which line logically comes first; all keys come after all

# values (since the values go with a previous key), and EOF comes after

# everything. Positive values mean that n comes first.

c=o.eof-n.eof or keycompare(o.lastkey,n.lastkey) or\

o.iskey-n.iskey or cmp(o.name.lower(),n.name.lower())

o.old=c<=0

n.old=c>=0

assert o.old or n.old,"not advancing in the file"

if killing and (o.eof or not isunder(o.lastkey,killing)): killing=False

if not killing:

if c<0:

if o.iskey:

# Delete a whole key if the new file is past all its subkeys.

# Note that n.lastkey!=o.name, because n must be a key.

if (n.eof or not isunder(n.lastkey,o.name)) and allowKeyDelete:

killing=o.name

out.write("\r\n[-"+o.name+"]\r\n")

elif allowValueDelete:

kp(o.lastkey)

out.write(o.valname()+"=-\r\n")

elif not n.iskey and n.str!=o.str:

kp(n.lastkey)

out.write(n.str)

out.write("\r\n")

if displayProgress: sys.stderr.write('\n')