#!/usr/bin/env python
# $Id: refcounter.py 3631 2007-06-03 05:08:45Z fitz $
#
# Copyright (c) 2007, Michael P. Fitzgerald (mpfitz@berkeley.edu)
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#     * Redistributions of source code must retain the above copyright
#       notice, this list of conditions and the following disclaimer.
#     * Redistributions in binary form must reproduce the above copyright
#       notice, this list of conditions and the following disclaimer in the
#       documentation and/or other materials provided with the distribution.
#     * The name of Michael P. Fitzgerald may not be used to endorse
#       or promote products derived from this software without specific
#       prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY MICHAEL P. FITZGERALD ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL MICHAEL P. FITZGERALD BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
#
# This script is used to count the references in a given LaTeX file.
#

if __name__ == '__main__':

    import os, sys, re, sets

    usage = """refcounter.py    $Rev: 3631 $
Copyright (c) 2007, Michael P. Fitzgerald (fitz@astro.berkeley.edu)
All rights reserved.

    Counts the number of citations for each reference code appering in
    a LaTeX file.

Usage:  refcounter.py <file>

Required arguments:
    file        filename of LaTeX manuscript

"""

    if len(sys.argv) <= 1:
        print usage
        sys.exit(1)
    
    # get filename
    filename = sys.argv[1]
    if not os.access(filename, os.R_OK):
        if not os.access(filename+'.tex', os.R_OK):
            print "Unable to read %s !" % filename
            sys.exit(2)
        else:
            filename = filename+'.tex'
    print "counting reference codes for %s ..." % filename

    # regular expression for citation commands
    r_cite = re.compile(r"\\cite(?:|t|p|talias|palias)\{(\S*?)\}")

    # regular expression for citation alias definitions
    r_alias = re.compile(r"\\defcitealias\{(\S*?)\}\{(\S*?)\}")

    # get all reference codes in file
    f = open(filename)
    refcodes = []
    aliases = {}
    for line in f:
        # strip comments
        line = re.sub(r"^%.*\n?", '\n', line)
        line = re.sub(r"^(.*[^\\])%.*\n?", r"\1\n", line)
        if line == '\n':
            continue

        # get regexp match for citations
        for m in re.finditer(r_cite, line):
            # get reference codes
            refs = m.group(1).split(',')
            # add reference codes to list
            refcodes.extend(refs)

        # get regexp match for citation alias definitions
        for m in re.finditer(r_alias, line):
            # get reference code
            ref = m.group(1)
            # get alias
            alias = m.group(2)
            # add to alias dictionary
            aliases[ref] = alias

    f.close()

    # count reference codes
    uniq_refcodes = sets.Set(refcodes)
    counts = [(refcodes.count(r), r) for r in uniq_refcodes]
    
    # display reference code counts
    counts.sort()
    counts.reverse()
    print
    print "\tcount\t\treference code"
    print "\t-----\t\t--------------"
    for c, r in counts:
        a = aliases.get(r, '')
        print "\t%d\t%s\t%s" % (c, a, r)
    
