#!/usr/bin/env python # $Id: refcounter.py 3631 2007-06-03 05:08:45Z fitz $ # # Copyright (c) 2007, Michael P. Fitzgerald (mpfitz@berkeley.edu) # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * The name of Michael P. Fitzgerald may not be used to endorse # or promote products derived from this software without specific # prior written permission. # # THIS SOFTWARE IS PROVIDED BY MICHAEL P. FITZGERALD ``AS IS'' AND ANY # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL MICHAEL P. FITZGERALD BE LIABLE FOR ANY # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # This script is used to count the references in a given LaTeX file. # if __name__ == '__main__': import os, sys, re, sets usage = """refcounter.py $Rev: 3631 $ Copyright (c) 2007, Michael P. Fitzgerald (fitz@astro.berkeley.edu) All rights reserved. Counts the number of citations for each reference code appering in a LaTeX file. Usage: refcounter.py Required arguments: file filename of LaTeX manuscript """ if len(sys.argv) <= 1: print usage sys.exit(1) # get filename filename = sys.argv[1] if not os.access(filename, os.R_OK): if not os.access(filename+'.tex', os.R_OK): print "Unable to read %s !" % filename sys.exit(2) else: filename = filename+'.tex' print "counting reference codes for %s ..." % filename # regular expression for citation commands r_cite = re.compile(r"\\cite(?:|t|p|talias|palias)\{(\S*?)\}") # regular expression for citation alias definitions r_alias = re.compile(r"\\defcitealias\{(\S*?)\}\{(\S*?)\}") # get all reference codes in file f = open(filename) refcodes = [] aliases = {} for line in f: # strip comments line = re.sub(r"^%.*\n?", '\n', line) line = re.sub(r"^(.*[^\\])%.*\n?", r"\1\n", line) if line == '\n': continue # get regexp match for citations for m in re.finditer(r_cite, line): # get reference codes refs = m.group(1).split(',') # add reference codes to list refcodes.extend(refs) # get regexp match for citation alias definitions for m in re.finditer(r_alias, line): # get reference code ref = m.group(1) # get alias alias = m.group(2) # add to alias dictionary aliases[ref] = alias f.close() # count reference codes uniq_refcodes = sets.Set(refcodes) counts = [(refcodes.count(r), r) for r in uniq_refcodes] # display reference code counts counts.sort() counts.reverse() print print "\tcount\t\treference code" print "\t-----\t\t--------------" for c, r in counts: a = aliases.get(r, '') print "\t%d\t%s\t%s" % (c, a, r)