User:Jrincayc/Patent utils
Python Code
This code may be used under either the license CC-BY-SA 3.0 or the GNU Lesser General Public License, version 2.1 or later.
= File patent_lib.py =
import re,urllib2,os,pickle,datetime,sys
CACHE_FILES = True
if CACHE_FILES:
cache_dir = os.path.join(os.path.expanduser('~'),".patent_lib_cache")
if not os.path.exists(cache_dir):
os.mkdir(cache_dir)
term_extension_file = os.path.join(cache_dir,"term_extension")
if os.path.exists(term_extension_file):
term_extension_dict = pickle.load(open(term_extension_file,"r"))
else:
term_extension_dict = {}
def get_patent_info(patent):
patent_lines = download_patent_data(patent)
return parse_patent_lines(patent,patent_lines)
def download_patent_data(patent):
patent = get_canonical_name(patent)
if CACHE_FILES:
if not os.path.exists(cache_dir):
os.mkdir(cache_dir)
patent_file = os.path.join(cache_dir,"us"+patent)
if os.path.exists(patent_file):
patent_lines = open(patent_file).readlines()
return patent_lines
first = urllib2.urlopen("http://patft1.uspto.gov/netacgi/nph-Parser?patentnumber="+patent)
first_lines = first.readlines()
refresh_line = [line for line in first_lines if "REFRESH" in line][0]
refresh_url = "http://patft1.uspto.gov"+re.match('.*?URL=(.*?)">',refresh_line).group(1)
#print refresh_url
patent_connection = urllib2.urlopen(refresh_url)
patent_lines = patent_connection.readlines()
if CACHE_FILES:
open(patent_file,"w").writelines(patent_lines)
return patent_lines
def parse_patent_lines(patent,patent_lines):
patent = get_canonical_name(patent)
ret_dict = {}
#print patent_lines
grant_index = patent_lines.index('
grant_date = patent_lines[grant_index].strip()
file_index = patent_lines.index('
file_line = patent_lines[file_index]
file_date = re.match(".*?(.*?)",file_line).group(1)
summary_index = grant_index+5
summary_end_index = patent_lines.index('
\n',summary_index)
summary = patent_lines[summary_index][16:].strip()
for i in range(summary_index+1,summary_end_index):
summary += " "+patent_lines[i].strip()
related_patents_header = '
\n'
patent_case_header = " if patent_case_header in patent_lines: patent_case_index = patent_lines.index(patent_case_header) patent_case_end = patent_lines.index(" \n",patent_case_index+2) patent_case_text = (" ".join([x.lstrip() for x in patent_lines[patent_case_index+2:patent_case_end]])).replace(" ret_dict["patent_case_text"] = patent_case_text if related_patents_header in patent_lines: related_index = patent_lines.index(related_patents_header) related_list = re.split(" | ||||||||||||
PCT Filed:\n'
if pct_file_header in patent_lines: pct_file_index = patent_lines.index(pct_file_header) pct_date = patent_lines[pct_file_index+2].strip() ret_dict["pct_file_date"] = pct_date ret_dict.update({"patent":patent,"file_date":file_date, "grant_date":grant_date,"summary":summary}) terminal_disclaimer_header = ' | ||||||||||||
[*] Notice: | \n'","").split(" | ")
class="wikitable sortable"
!Patent!!Filed!!Granted!!First File!!Expiration!!Summary!!Notes!!Company""" for line in lines: if line.startswith("**"): patent = eval(line[3:].strip()) expire_date,reason = patent_lib.get_patent_expiration(patent) expiration = to_wiki_date(expire_date) if patent.has_key("orig_patent"): filed = to_wiki_date_s(patent["orig_file_date"]) granted = to_wiki_date_s(patent["orig_grant_date"]) notes = "Reissue of "+patent["orig_patent"]+" filed "+to_wiki_date_s(patent["file_date"])+" granted "+to_wiki_date_s(patent["grant_date"]) else: filed = to_wiki_date_s(patent["file_date"]) granted = to_wiki_date_s(patent["grant_date"]) notes = "" first_date = to_wiki_date(patent_lib.get_first_date(patent)) notes += " [http://patft1.uspto.gov/netacgi/nph-Parser?patentnumber="+patent["patent"]+"]"+" "+reason if "patent_case_text" in patent: notes += " Case Text: "+patent["patent_case_text"].replace("\n","") print ' | |||||||
'
print '|',patent["patent"]," | ",filed," | ",granted," | ",first_date," | ",expiration," | ",patent["summary"]," | ",notes," | ",""+patent.get("company","Unknown")+""
#print patent, #if patent.has_key("orig_patent"): sys.stdout.flush() print ' |
\n |
= TODO =
Fix handling of Continuation patents. Example: http://patft1.uspto.gov/netacgi/nph-Parser?patentnumber=6,289,308
This patent's application date is Jun., 1990, so the expiration date should be June, 2010, instead of 08 mar 2020. More or less done. The information that is easy to parse is parsed.
Fix handling of Foreign patents. Example: http://patft1.uspto.gov/netacgi/nph-Parser?patentnumber=5455833 This patent has foreign patent priority data, so the 20 year terms starts earlier. The expiration date should be October 3, 2012, not 26 apr 2013 that the program reports. More or less done.
Ask USPTO to include Patent term adjustment data. Done. (No response yet, and its been a month). Maybe could grab from google instead. Search http://www.google.com/patents?as_pnum=7020204 to get id number, then http://www.google.com/patents?id=PSR4AAAAEBAJ&output=text&pg=PA1 to get text version (already OCRed) and see if there is a patent term adjustment.
PCT filed patents have the PCT filing date as filing date, not the US filing date. http://www.uspto.gov/web/offices/pac/mpep/documents/appxl_35_U_S_C_365.htm#usc35s365
The filing date for determining if there is a 17 year term is the PCT filing date or the last filing date. The filing date from the first continuation only determines the start of the 20 year term.
See: http://www.uspto.gov/web/offices/pac/mpep/documents/appxl_35_U_S_C_119.htm#usc35s119 http://www.uspto.gov/web/offices/pac/mpep/documents/appxl_35_U_S_C_154.htm#usc35s154 http://www.uspto.gov/web/offices/pac/mpep/documents/2700_2701.htm#sect2701
Test Data
= Short Test Data =
Comment
- US 4,849,812
- US 4,864,393
- US RE39,080
= MPEG-2 Patent List =
- MPEGLA patents [http://www.mpegla.com/m2/m2-patentlist.cfm]
- Alcatel-Lucent
- US 4,833,543
- US 4,970,590
- US 5,453,790
- AT&T Bell Laboratories
- US 5,136,377
- Bell Telephone Laboratories, Incorporated
- US 4,383,272
- British Telecommunications plc
- US 5,291,284
- Canon Inc.
- US 4,982,270
- CIF LICENSING, LLC
- US 5,068,724
- US 5,091,782
- US 5,093,720
- Columbia University
- US Re 35,093
- France Télécom (CNET)
- US 4,796,087
- Fujitsu
- US 5,235,618
- General Electric Capital Corporation
- US 4,706,260
- US 4,813,056
- General Instrument Corp. (now the broadband division of Motorola)
- US 4,394,774
- US 4,698,672
- GE Technology Development, Inc.
- US 5,426,464
- US 5,486,864
- US 5,491,516
- US 5,600,376
- US 5,796,743
- Hewlett-Packard Company
- US 5,867,501
- Hitachi, Ltd.
- Koninklijke Philips Electronics N.V.
- US 4,849,812
- US 4,901,075
- US 5,021,879
- US 5,027,206
- US 5,128,758
- US 5,179,442
- US 5,333,135
- US 5,606,539
- US 5,608,697
- US 5,699,476
- US 5,740,310
- US 5,844,867
- US 6,181,712
- US 6,792,001
- KDDI Corporation (KDDI)
- LG Electronics Inc.
- US Re 37,057
- US Re 37,568
- Matsushita now Panasonic Corporation
- US 5,113,255
- US Re 35,910
- US Re 36,015
- US Re 36,507
- US Re 39,276
- US Re 39,278
- US Re 39,280
- US 5,223,949
- US 5,412,430
- US 5,784,107
- Mitsubishi
- US 4,954,892
- US 5,072,295
- US 5,268,846
- US 5,949,489
- US 5,963,258
- US 5,970,175
- US 5,990,960
- US 6,002,439
- US 6,097,759
- US 6,188,794
- US 6,307,973
- US 7,362,805
- US 7,376,184
- US 7,756,202
- US 7,936,817
- Multimedia Patent Trust
- US 4,958,226
- US 5,227,878
- US 5,500,678
- US 5,563,593
- Nippon Telegraph and Telephone Corporation (NTT)
- NXP
- Philips
- US 4,849,812
- US 4,901,075
- US 5,021,879
- US 5,027,206
- US 5,128,758
- US 5,179,442
- US 5,333,135
- US 5,606,539
- US 5,608,697
- US 5,740,310
- US 5,844,867
- Robert Bosch GmbH
- Samsung
- US 5,461,421
- US 5,467,086
- US 5,654,706
- US 6,680,975
- US 7,292,657
- US 7,609,760
- US 7,616,687
- US 7,684,490
- US 7,724,821
- US 7,724,822
- US 7,724,823
- US 7,724,824
- US 7,724,828
- US 7,724,829
- US 7,742,522
- US 7,742,527
- US 7,764,735
- US 7,782,956
- US 7,787,538
- Sanyo Electric Co., Ltd.
- Scientific Atlanta
- US 5,418,782
- US 5,420,866
- US 5,457,701
- Sharp
- Sony
- US 4,864,393
- US Re 37,222
- US 5,191,436
- US 5,291,486
- US 5,298,991
- US 5,343,248
- US 5,428,396
- US 5,461,420
- US 5,481,553
- US 5,510,840
- US 5,539,466
- US 5,543,847
- US 5,559,557
- US 5,663,763
- US 5,666,461
- US 5,701,164
- US 5,946,042
- US 5,982,437
- US 6,040,863
- US 6,160,849
- US 7,627,041
- The Trustees of Columbia University in the City of New York
- US Re 35,093
- Thomson Licensing S.A.
- US 4,800,432
- US 4,969,055
- US 5,289,276
- US 5,365,272
- US 5,381,181
- US 5,422,676
- US 5,442,400
- US 5,459,789
- US 5,483,287
- US 5,565,923
- US 5,784,110
- US 7,020,204
- US 7,334,248
- Toshiba
- US 5,317,397
- US 5,424,779
- US 5,467,136
- US 5,742,344
- US 5,986,713
- Victor Company of Japan, Limited (JVC).
- US Re 34,965
- US Re 35,158
- US Re 36,822
- US 5,103,307
- US 5,175,618
- Alcatel-Lucent [http://www.tunequest.org/a-big-list-of-mp3-patents/20070226/]
- US 5,341,457
- US RE39,080
- Audio MPEG, Inc [http://www.audiompeg.com/us_patents.asp]
- US 4,972,484
- US 5,214,678
- US 5,323,396
- US 5,539,829
- US 5,606,618
- US 5,530,655
- US 5,777,992
- US 6,289,308
- US 5,481,643
- US 5,544,247
- US 5,610,985
- US 5,740,317
- US 5,878,080
- US 5,960,037
- US 5,991,715
- US 6,023,490
- Thomson [http://www.tunequest.org/a-big-list-of-mp3-patents/20070226/]
- US 4,821,260
- US 4,942,607
- US 5,214,742
- US 5,227,990
- US 5,384,811
- US 5,736,943
- US 5,455,833
- US 5,559,834
- US 5,321,729
- US 5,706,309
- US 5,701,346
- US 5,742,735
- US 5,812,672
- US 5,579,430
- US 6,185,539
- US 6,009,399
- US 5,924,060
- US 5,703,999
== Overrides ==
6792001 {'related_info': [{'application_number': '537701', 'patent_number': '6181712','filing_date': 'Feb. 23, 1995'}],'override_reason':'Auto parse missed date'}
7609760 {'related_info': [{'filing_date': 'Mar. 1, 1993'}],'override_reason':'Autoparse missed application Ser. No. 08/024,305, filed Mar. 1, 1993.'}
7616687 {'related_info': [{'filing_date': 'Mar. 1, 1993'}],'override_reason':'Autoparse missed application Ser. No. 08/024,305, filed Mar. 1, 1993.'}
7684490 {'related_info': [{'filing_date': 'Mar. 1, 1993'}],'override_reason':'Autoparse missed application Ser. No. 08/024,305, filed Mar. 1, 1993.'}
7724821 {'related_info': [{'filing_date': 'Mar. 1, 1993'}],'override_reason':'Autoparse missed application Ser. No. 08/024,305, filed Mar. 1, 1993.'}
7724822 {'related_info': [{'filing_date': 'Mar. 1, 1993'}],'override_reason':'Autoparse missed application Ser. No. 08/024,305, filed Mar. 1, 1993.'}
7724823 {'related_info': [{'filing_date': 'Mar. 1, 1993'}],'override_reason':'Autoparse missed application Ser. No. 08/024,305, filed Mar. 1, 1993.'}
7724824 {'related_info': [{'filing_date': 'Mar. 1, 1993'}],'override_reason':'Autoparse missed application Ser. No. 08/024,305, filed Mar. 1, 1993.'}
7742522 {'related_info': [{'filing_date': 'Mar. 1, 1993'}],'override_reason':'Autoparse missed application Ser. No. 08/024,305, filed Mar. 1, 1993.'}
7742527 {'related_info': [{'filing_date': 'Mar. 1, 1993'}],'override_reason':'Autoparse missed application Ser. No. 08/024,305, filed Mar. 1, 1993.'}
7764735 {'related_info': [{'filing_date': 'Mar. 1, 1993'}],'override_reason':'Autoparse missed application Ser. No. 08/024,305, filed Mar. 1, 1993.'}
7787538 {'related_info': [{'filing_date': 'Mar. 1, 1993'}],'override_reason':'Autoparse missed application Ser. No. 08/024,305, filed Mar. 1, 1993.'}
5701164 {'related_info': [{'filing_date': 'Mar. 24, 1994'}],'override_reason':'Auto parse missed PCT date'}
5946042 {'related_info': [{'filing_date': 'Mar. 24, 1994'}],'override_reason':'Auto parse missed PCT date'}
6040863 {'related_info': [{'filing_date': 'Mar. 24, 1994'}],'override_reason':'Auto parse missed PCT date'}
7627041 {'related_info': [{'filing_date': 'Jan. 13, 1994'}],'override_reason':'Auto parse missed old filing date'}
7334248 {'related_info': [{'filing_date': 'Apr. 22, 1994'}],'override_reason':'Error in original patent, fixed in correction B2'}
4394774 {'terminal_disclaimer_date': 'December 15, 1998','override_reason': 'Referenced patent had its term changed'}
7627041 {'term_extension': 889, 'override_reason': 'Error in original patent, fixed in correction B2'}
= Philips Video CD US patent list =
- From http://www.ip.philips.com/services/?module=IpsLicenseProgram&command=View&id=52&part=4
- general philps and sony
- US 5068846
- mode 2 philps and sony
- US 4977550
- mei
- US 5113255
- US 5223949
- part philps
- US 5127858
- US 5179442
- US 5991715
- US 5323396
- US 5777992
- US 5539829
- US 7209565
- US 5745641
- US 5606539
- US 5844867
- US 5214678
- part sony
- US RE37222
- US 5191436
- US 5291486
- part jvc
- US RE34965
- US RE35158
= Script for MPEG-2 patents =
./patent_grab.py bare_mpeg2_patents | ./patent_re_orig_date.py | ./patent_add_company_info.py | ./patent_add_term_extension.py | ./patent_override.py mpeg2_overrides > grabbed_company_mpeg2_patent_info
./patent_to_wiki_table.py grabbed_re_mpeg2_patent_info > mpeg2_wiki_table