User:Tardis/regdiff.py
- !/usr/bin/env python
- regdiff.py
- Created November 9 2008
- Updated November 10 2008
- Version 0.3.1
- This program is free software; you can redistribute it and/or modify it under
- the terms of the GNU General Public License version 2, the GNU Free
- Documentation License version 1.2 (with no Invariant Sections, with no
- Front-Cover Texts, and with no Back-Cover Texts), or (at your option) any
- later version of either license. It is distributed in the hope that it will
- be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
- Public License for more details.
- Description:
- Compares two Windows registry (.REG) files and produces a third such that
- applying it transforms a registry represented by the first into a registry
- represented by the second. Usually you should redirect the output to a file.
- Set displayProgress to a positive value n to report with a # every n lines.
- Warnings:
- The output deletes all keys and values present in the first but not in the
- second! Therefore, this should be run only on exports of complete subtrees.
- The input must be sorted (values sorted within each key); I believe that
- Regedit's export does this, and will also guarantee complete subtrees.
- It's probably wise to double check any key removals in the output: look for
- lines beginning with the two characters "[-". Modern .REG files use UTF-16,
- so some editors and tools (like grep) may have trouble with them. Using
- "grep '^.\[.-' out.reg" should work to at least detect removals.
- You can disable deletions of keys or values entirely by setting the
- appropriate variables (allow*Delete). If keys cannot be deleted but values
- can, each value named in the old file under a key that would be deleted will
- be deleted. (This is not as dangerous because reapplying the old file will
- restore them.)
- History:
- Version 0.1, November 9 2008:
- - initial release
- Version 0.2, November 9 2008:
- - use CRLF newlines
- - support deletion disabling
- - detect misorderings
- Version 0.3, November 9 2008:
- - make allowKeyDelete apply to the last keys to delete
- - verify continuous progress
- - support displaying progress
- Version 0.3.1, November 10 2008:
- - use case-insensitive and subkey-aware comparisons for ordering
- Bugs:
- Assumes that no key contains assignments to @ and to "" -- I think the latter
- is invalid anyway.
- I don't know whether .REG files are really UTF-16 or UCS-2.
- I'm not sure that the last blank line is really necessary; a trailing CRLF
- may be sufficient.
import sys,codecs
def keycompare(a,b):
"""Return an integer indicating the ordering of keys a and b."""
return cmp(a.lower().split('\\'),b.lower().split('\\'))
class line(object):
def __init__(self,s,k=None):
"""Parse s and make a line object.
Inherit key from line k unless we are a key or it is omitted or None.
Use k to detect misordered input if it is not None.
Names are not unescaped, but escaping is considered in their extent."""
self.old=False
self.str=s
self.eof=s is None
self.iskey=not self.eof and len(s)>1 and s[0]=='['
index=1 # points past end of name
if self.eof:
self.name=None
self.delete=False
self.lastkey=None
elif self.iskey:
self.delete=s[1]=='-'
self.lastkey=self.name=s[1+self.delete:-3] # ends in "]\r\n"
if k is not None and k.lastkey is not None and\
keycompare(self.lastkey,k.lastkey)<0:
raise ValueError,"key %r precedes %r in input"%\
(k.lastkey,self.lastkey)
else:
if s[0]=='"':
quote=False
for c in s[1:]:
index+=1
if quote: quote=False
elif c=='\\': quote=True
elif c=='"': break
else: raise IOError,"unterminated name in "+repr(s)
elif s[0]!='@': raise IOError,"unrecognized format: "+repr(s)
# The name for @ is "", which properly sorts before everything.
self.name=s[1:index-1]
assign=s[index:].lstrip()
if len(assign)<2 or assign[0]!='=':
raise IOError,"no assignment in" +repr(s)
self.delete=assign[1]=='-'
if k is None:
self.lastkey=None
else:
self.lastkey=k.lastkey
if not k.iskey and self.name.lower() raise ValueError,"value %r precedes %r in input"%\ (k.name,self.name) def valname(self): """Return the original form of this value's name.""" if self.iskey: raise ValueError,"this is not a value" return '"'+self.name+'"' if self.name else '@' def __str__(self): return self.__class__.__name__+\ ('['+("EOF" if self.eof else repr(self.str)+(" (key)" if self.iskey else " in key "+repr(self.lastkey)))+ ": "+repr(self.name)+']') class keyprint(object): def __init__(self,o): self.key=None self.out=o def __call__(self,k): if k!=self.key: self.key=k self.out.write("\r\n["+k+"]\r\n") def terminated(s): """Return true if the string contains an even number of unquoted \"s and does not end in an unescaped backslash.""" quote=False ret=True for c in s: if quote: quote=False elif c=='\\': quote=True elif c=='"': ret=not ret return ret and not quote def nextLogical(f): """Return the next logical line from a file object. Never returns a null string. Return None at EOF.""" ret="" done=False while not done: l=f.readline() if l=="": if ret=="": return None else: raise IOError,"file ends with escape or in string" c=l.rstrip('\r\n') if c=="" and ret=="": continue # skip initial blank lines ret+=c done=terminated(ret) if c!=l: ret+="\r\n" return ret def isunder(s,r): """Return non-nil if the key s is in the tree rooted at r.""" return s==r or s.startswith(r+'\\'); if len(sys.argv)!=3: print >>sys.stderr,"usage: "+sys.argv[0]+" old.reg new.reg" sys.exit(2) # BAD_ARGS allowKeyDelete=True allowValueDelete=True displayProgress=0 ci=codecs.lookup("utf_16") fo=ci.streamreader(open(sys.argv[1],'rb')) fn=ci.streamreader(open(sys.argv[2],'rb')) out=ci.streamwriter(sys.stdout) kp=keyprint(out) head=fo.readline() if fn.readline()!=head: raise IOError,"different file headers" out.write(head.rstrip('\r\n')+"\r\n") o=n=line(None) o.old=True killing=False # the tree being deleted, if any iters=0 while True: iters+=1 if displayProgress and iters%displayProgress==0: sys.stderr.write('#') if o.old: o=line(nextLogical(fo),o) if n.old: n=line(nextLogical(fn),n) if o.eof and n.eof: break if o.delete or n.delete: raise IOError,"input contains deletion requests" # Determine which line logically comes first; all keys come after all # values (since the values go with a previous key), and EOF comes after # everything. Positive values mean that n comes first. c=o.eof-n.eof or keycompare(o.lastkey,n.lastkey) or\ o.iskey-n.iskey or cmp(o.name.lower(),n.name.lower()) o.old=c<=0 n.old=c>=0 assert o.old or n.old,"not advancing in the file" if killing and (o.eof or not isunder(o.lastkey,killing)): killing=False if not killing: if c<0: if o.iskey: # Delete a whole key if the new file is past all its subkeys. # Note that n.lastkey!=o.name, because n must be a key. if (n.eof or not isunder(n.lastkey,o.name)) and allowKeyDelete: killing=o.name out.write("\r\n[-"+o.name+"]\r\n") elif allowValueDelete: kp(o.lastkey) out.write(o.valname()+"=-\r\n") elif not n.iskey and n.str!=o.str: kp(n.lastkey) out.write(n.str) out.write("\r\n") if displayProgress: sys.stderr.write('\n')