#!/usr/bin/env python # $Id$ # # Copyright (c) 2007, Michael P. Fitzgerald (mpfitz@berkeley.edu) # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * The name of Michael P. Fitzgerald may not be used to endorse # or promote products derived from this software without specific # prior written permission. # # THIS SOFTWARE IS PROVIDED BY MICHAEL P. FITZGERALD ``AS IS'' AND ANY # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL MICHAEL P. FITZGERALD BE LIABLE FOR ANY # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Some of the conversion code ported from J. Baker's nat2jour.pl # part of the astronat package. # # Commands for compressing figures ported from M. Perrin's # submission-prep Perl script. import sys, os, re, shutil, string default_maxauths = 8 default_res = 75 # [dpi] default_thresh = 200 # [kilobytes] def check_ext(s, ext): "get filename with default extension" if not os.access(s, os.R_OK): s = s+ext try: assert os.access(s, os.R_OK) except: print "can't find %s ..." % s return s def compress_figure(eps_fn, res = default_res, thresh = default_thresh): "Compress an eps figure for arXiv submission. 'res' is resolution in dpi; 'thresh' is threshold for compression in kb" thresh *= 1024 # [kilobytes] -> [bytes] if os.path.getsize(eps_fn) > thresh: print "\tprocessing %s ..." % eps_fn jpg_fn = eps_fn.replace('.eps', '.jpg') new_eps_fn = eps_fn.replace('.eps', '_p.eps') # create processed figure os.system("gs -r%d -dEPSCrop -dTextAlphaBits=4 -sDEVICE=jpeg -sOutputFile=%s -dBATCH -dNOPAUSE %s" % (res, jpg_fn, eps_fn)) os.system("jpeg2ps %s > %s" % (jpg_fn, new_eps_fn)) os.remove(jpg_fn) # use new figure if smaller if os.path.getsize(new_eps_fn) < os.path.getsize(eps_fn): os.rename(new_eps_fn, eps_fn) else: os.remove(new_eps_fn) else: print "\tskipping %s ..." % eps_fn class SubmissionPreparer(object): "Class for preparing a manuscript for submission" def get_submission_dir(self, fileroot): "convention for submission directory" return fileroot+'_submission/' def figname(self, num, i, n): "filename convention for processed figures" if n == 1: letter = '' else: letter = string.ascii_lowercase[i] return "f%d%s.eps" % (num, letter) def parse_figures(self, line): "parse a graphics object line (uncommented)" # FIXME handle case when more than one command per line # FIXME handle case when online-only command on separate line newfigs, ooc_newfigs = [], [] if re.match(r"\\begin\{figure\}", line): self.curfig += 1 self.in_figure = True if re.match(r"\\end\{figure\}", line): self.in_figure = False m = re.match(r"[^%]*\\plotone\{(.*?)\}", line) if m: assert self.in_figure newfigs.append(m.group(1)) m = re.match(r"[^%]*\\includegraphics(?:\[.*?\])?\{(.*?)\}", line) if m: assert self.in_figure newfigs.append(m.group(1)) m = re.match(r"[^%]*\\plottwo\{(.*?)\}\{(.*?)\}", line) if m: assert self.in_figure newfigs.append(m.group(1)) newfigs.append(m.group(2)) m = re.match(r"[^%]*\\onlineonlycolor\{(.*?)\}", line) if m: assert self.in_figure ooc_newfigs.append(m.group(1)) for i, epsfile in enumerate(newfigs): newfile = self.figname(self.curfig, i, len(newfigs)) # replace filename in line line = line.replace(epsfile, newfile) # add .eps if it's not there epsfile = check_ext(epsfile, '.eps') # put figure in list self.figlist.append((epsfile, newfile)) # online-only color figures if len(ooc_newfigs) > 0: assert len(ooc_newfigs) == len(newfigs) for i, epsfile in enumerate(ooc_newfigs): newfile = self.figname(self.curfig, i, len(newfigs)).replace('.eps', '_color.eps') line = line.replace(epsfile, newfile) epsfile = check_ext(epsfile, '.eps') self.figlist.append((epsfile, newfile)) return line def process_bbl(self, bbl_infn, bbl_outfn): "Process the bibliography file. Returns a list of bibliography keys in order of bib. entry." bbl_inf = open(bbl_infn) self.filelist.append(bbl_outfn) procbib = False # currently processing bibitems bibinfo = {} # contains bibliography information for each key bibkeys = [] # ordered list of keys n_auths = {} # number of authors for line in bbl_inf: # new bibitem if re.match(r"\\bibitem", line): item = line procbib = True continue # append line to current item strint if procbib: item += line # end of item -- process if re.match(r"\n", line) and procbib: procbib = False # strip newlines item = re.sub(r"\n", '', item) # strip '\natexlab's item = re.sub(r"\{\\natexlab\{(.*?)\}\}", r"\1", item) # strip \noopsort{}'s item = re.sub(r"\{\\noopsort\{(.*?)\}\}", '', item) # parse entry m = re.match(r"\\bibitem\[\{(.*?)\((\d{4}[a-z]*)\)(.*?)\}\]\{(.*?)\}(.*)", item) if m: shortlist, year, longlist, key, ref = m.groups() authlist = longlist and longlist or shortlist # save number of authors n_auths[key] = authlist.count(',')+1 # shorten author list in reference if too long if n_auths[key] > maxauths: # extract author list, last comma before year (if any) and year m = re.match(r"(.*?)(\,)?\s*(\d{4})", ref) if not m: print "Can't find year in reference:\n%s" % ref auths, lastcomma = m.group(1, 2) lastcomma = lastcomma or '' # set blank string if no match end = m.start(3) # position of year if not re.match(r"\w", auths): # avoid refs like "---, 1998, ..." #n_commas = maxauths*2 n_commas = 2 if n_commas > auths.count(','): print "Error trying to truncate author list:\n%s" % ref else: # find the index of the Nth comma pos = 0 for i in range(n_commas): pos = auths.index(',', pos) pos += 1 # replace with etal ref = ref[0:pos]+" {et~al.}"+lastcomma+ref[end-1:] # output entry bibkeys.append(key) bibinfo[key] = shortlist, year, longlist, ref else: # didn't match print "weird bibitem: %s" % item bbl_inf.close() # output info to bbl file bbl_outf = open(bbl_outfn, 'w') bbl_outf.write("\\begin{thebibliography}{%d}\n\n" % len(bibkeys)) for key in bibkeys: shortlist, year, longlist, ref = bibinfo[key] bbl_outf.write("\\bibitem[{%s(%s)%s}]{%s}\n%s\n\n" % (shortlist, year, longlist, key, ref)) bbl_outf.write("\\end{thebibliography}\n") bbl_outf.close() return bibkeys def parse_extra(self, line): "any extra processing" return line def process_manuscript(self, ms_infn, ms_outfn, bibkeys=None): "Processes the manuscript" if bibkeys is None: bibkeys = [] ms_inf = open(ms_infn) self.filelist.append(ms_outfn) ms_outf = open(ms_outfn, 'w') filebegin = True # True if we haven't encountered any non-whitespace/comments yet in_document = False # True in document environment first_citation = dict(zip(bibkeys, (True,)*len(bibkeys))) # process line-by-line # NOTE can't handle things when commands are broken across lines for line in ms_inf: if strip: # skip processing if blank if line == '\n': if not filebegin: ms_outf.write(line) continue # strip comments line = re.sub(r"^%.*\n?", '\n', line) line = re.sub(r"^(.*[^\\])%.*\n?", r"\1\n", line) if line == '\n': continue filebegin = False # now we're processing real commands # check if we're in the meat of the text if re.match(r"\\begin\{document\}", line): in_document = True if re.match(r"\\end\{document\}", line): in_document = False # handle any extra processing line = self.parse_extra(line) # handle graphics objects line = self.parse_figures(line) # include files m = re.search(r"\\input\{(.*)\}", line) if m: infile = m.groups()[0] infile = check_ext(infile, '.tex') if do_input: # input file # NOTE assumes one input command per line ms_outf.writelines(open(infile).readlines()) line = re.sub(r"\\input\{.*\}", '', line) else: # copy file self.filelist.append(infile) print "%s -> %s" % (infile, self.subdir+infile) shutil.copyfile(infile, self.subdir+infile) # if this is the first citation for a ref w/ 3 authors, use the long format # NOTE no longer required by ApJ (?) ## if in_document: ## # iter over all citations in this line ## for m in re.finditer(r"\\([Cc]ite.*?)(\[.*\])?\{(.*?)\}", line): ## citecomm, opt, keystr = m.groups() ## opt = opt or '' # set to blank string if no match ## comm = "\\%s%s{%s}" % (citecomm, opt, keystr) ## # iter over all keys in this citation ## keys = re.split(r"\,\s*", keystr) ## for key in keys: ## if first_citation[key]: ## first_citation[key] = False ## if n_auths[key] == 3: ## if len(keys) > 1: ## print "warning: won't use long format for 1st occ. in mult. cit.: %s" % comm ## else: ## # make sure long-format ## if not citecomm.endswith('*'): ## # replace first occurrence with long-format version ## newcomm = "\\%s*%s{%s}" % (citecomm, opt, keystr) ## line = line.replace(comm, newcomm, 1) # remove bibliography style commands if re.match(r"\\bibliographystyle", line): continue # replace bibliography entry line = re.sub(r"\\bibliography\{.*\}", r"\\bibliography{}", line) # comment out natbib package # NOTE must be alone on a line line = re.sub(r"\\usepackage\{natbib\}", r"%\\usepackage{natbib}", line) # write line to output ms_outf.write(line) ms_inf.close() ms_outf.close() def setup(self, fileroot): "set up internal variables" self.filelist = [] # holds list of files self.curfig = 0 # index for current figure number self.figlist = [] # holds list of figures self.in_figure = False # currently in a figure environment # make submission directory self.subdir = self.get_submission_dir(fileroot) if not os.access(self.subdir, os.W_OK): os.mkdir(self.subdir) def do_prep(self, fileroot, strip=True, maxauths=8, do_input=True, do_figcheck=False, **kwargs): """Prepares given tex file for submission. fileroot (string) root of manuscript filename (fileroot.tex) strip (bool) option to strip blank lines/comments maxauths (int) option for maximum number of authors to list do_input (bool) option to place \input'ted files into master document, instead of copy NOTE: does not handle recursion do_figcheck (bool) create fileroot_figcheck.tex, useful for examining figures """ # file/path names bbl_infn = fileroot+'.bbl' bbl_outfn = 'ms.bbl' ms_infn = fileroot+'.tex' ms_outfn = 'ms.tex' if do_figcheck: figcheck_fn = fileroot+'_figcheck.tex' # sanity checks assert os.access(ms_infn, os.R_OK) assert os.access(bbl_infn, os.R_OK) # convert bibliography file (.bbl) bibkeys = self.process_bbl(bbl_infn, self.subdir+bbl_outfn) # convert manuscript self.process_manuscript(ms_infn, self.subdir+ms_outfn, bibkeys=bibkeys) # copy figures for epsfile, newfile in self.figlist: print "%s -> %s" % (epsfile, newfile) self.filelist.append(newfile) shutil.copyfile(epsfile, self.subdir+newfile) # make figure-check file if do_figcheck: f = open(figcheck_fn, 'w') f.write("""\\documentclass[preprint2]{aastex} \\begin{document} """) for epsfile, newfile in self.figlist: f.write("\\includegraphics[width=.4\\textwidth]{%s}\n\n" % epsfile) f.write("\\end{document}\n") f.close() def prep(self, fileroot, **kwargs): "wrapper for preparation" self.setup(fileroot) self.do_prep(fileroot, **kwargs) _ApJSubmissionPreparerBase = SubmissionPreparer class ApJSubmissionPreparer(_ApJSubmissionPreparerBase): "Class for preparing a manuscript for submission to ApJ" def get_submission_dir(self, fileroot): "convention for submission directory" return fileroot+'_apj/' def do_prep(self, fileroot, **kwargs): """Prepares given tex file for ApJ submission. fileroot (string) root of manuscript filename (fileroot.tex) strip (bool) option to strip blank lines/comments maxauths (int) option for maximum number of authors to list do_input (bool) option to place \input'ted files into master document, instead of copy NOTE: does not handle recursion do_figcheck (bool) create fileroot_figcheck.tex, useful for examining figures """ # file/path names readme_infn = fileroot+'.README' readme_outfn = 'README' response_infn = 'ref_rept_response.txt' response_outfn = 'response' # sanity checks assert os.access(readme_infn, os.R_OK) # copy README print "%s -> %s" % (readme_infn, readme_outfn) self.filelist.append(readme_outfn) shutil.copyfile(readme_infn, self.subdir+readme_outfn) # copy referee rept. response (if any) if os.access(response_infn, os.R_OK): print "%s -> %s" % (response_infn, response_outfn) self.filelist.append(response_outfn) shutil.copyfile(response_infn, self.subdir+response_outfn) # process _ApJSubmissionPreparerBase.do_prep(self, fileroot, **kwargs) # append file list to README f = open(self.subdir+readme_outfn, 'a') f.write('Included files:\n') for fn in self.filelist: if fn.count('color') == 1: fn += '\t (online-only color version)' f.write("\t%s\n" % fn) f.write('\n') f.close() _ArXivSubmissionPreparerBase = SubmissionPreparer class ArXivSubmissionPreparer(_ArXivSubmissionPreparerBase): "Class for preparing a manuscript for submission to arXiv" def get_submission_dir(self, fileroot): "convention for submission directory" return fileroot+'_arxiv/' def parse_figures(self, line): "parse a graphics object line (uncommented)" # FIXME handle case when more than one command per line # FIXME handle case when online-only command on separate line newfigs = [] if re.match(r"\\begin\{figure\}", line): self.curfig += 1 self.in_figure = True if re.match(r"\\end\{figure\}", line): self.in_figure = False m = re.match(r"[^%]*\\plotone\{(.*?)\}", line) if m: assert self.in_figure newfigs.append((m.group(1), m.group(1))) m = re.match(r"[^%]*\\includegraphics(?:\[.*?\])?\{(.*?)\}", line) if m: assert self.in_figure newfigs.append((m.group(1), m.group(1))) m = re.match(r"[^%]*\\plottwo\{(.*?)\}\{(.*?)\}", line) if m: assert self.in_figure newfigs.append((m.group(1), m.group(1))) newfigs.append((m.group(2), m.group(2))) m = re.match(r"[^%]*\\onlineonlycolor\{(.*?)\}", line) if m: assert self.in_figure # replace previous figure with online-only color version newfigs[-1] = newfigs[-1][0], m.group(1) # strip command line = re.sub(r"\\onlineonlycolor\{(.*?)\}", '', line) for i, (bw_epsfile, col_epsfile) in enumerate(newfigs): newfile = self.figname(self.curfig, i, len(newfigs)) # replace filename in line line = line.replace(bw_epsfile, newfile) # add .eps if it's not there col_epsfile = check_ext(col_epsfile, '.eps') # put figure in list self.figlist.append((col_epsfile, newfile)) return line def parse_extra(self, line): "extra processing to remove onlineonlycolor command" line = re.sub(r"\\newcommand\{\\onlineonlycolor\}\[1\]\{.*\}", '', line) return line def do_prep(self, fileroot, res=default_res, thresh=default_thresh, **kwargs): """Prepares given tex file for arXiv submission. fileroot (string) root of manuscript filename (fileroot.tex) strip (bool) option to strip blank lines/comments maxauths (int) option for maximum number of authors to list do_input (bool) option to place \input'ted files into master document, instead of copy NOTE: does not handle recursion do_figcheck (bool) create fileroot_figcheck.tex, useful for examining figures res (int) resolution for compressing figures [dpi] thresh (int) threshold for compressing figures [kb] """ # process _ArXivSubmissionPreparerBase.do_prep(self, fileroot, **kwargs) # process figures for size requirements for dum, eps_fn in self.figlist: compress_figure(self.subdir+eps_fn, res=res, thresh=thresh) if __name__ == '__main__': usage = """prep_jour.py $Rev$ Copyright (c) 2007, Michael P. Fitzgerald (fitz@astro.berkeley.edu) All rights reserved. Prepares a given tex file for submission to a journal service. Usage: prep_jour.py [-s] [-a ] [-i] [-m ] Required arguments: fileroot root of manustript filename (fileroot.tex) Options: -s disable stripping of blank lines/comments -a maximum number of authors to list in bibliography (default %d) -i disable placement of \input'ted files into master document -f create fileroot_figcheck.tex, useful for examining figures -m preparation mode, i.e. apj (default), arxiv -r resolution for compressed figures (arxiv mode, default %d dpi) -t threshold for compressing figures (arxiv mode, default %d kb) Description ----------- This program creates a directory called fileroot_submission which contains the processed manuscript and supporting files. It expects your manuscript to follow a certain layout: fileroot.tex Manuscript fileroot.bbl Bibliography for ApJ: fileroot.README README file (e.g. contact information) ref_rept_response.txt (optional) response to referee Online-only Color Figures ------------------------- To specify online-only color figures for ApJ, this program expects the following: In your manuscript, create an \onlineonlycolor command in the preamble, e.g. \\newcommand{\onlineonlycolor}[1]{\\notetoeditor{Online-only color figure: #1}} Then, for every grayscale figure that has an online-only color counterpart, use the grayscale version in your manuscript and tag the color version with the \onlineonlycolor command, e.g. \plotone{example.eps}\onlineonlycolor{example_color.eps} """ % (default_maxauths, default_res, default_thresh) # get command line args/options import getopt try: opts, args = getopt.getopt(sys.argv[1:], 'sa:ifm:r:t:') except getopt.GetoptError: print usage sys.exit(2) # default options strip = True maxauths = default_maxauths do_input = True do_figcheck = False mode = 'apj' res = default_res thresh = default_thresh # process options for o, a in opts: if o == '-s': strip = False if o == '-a': maxauths = int(a) if o == '-i': do_input = False if o == '-f': do_figcheck = True if o == '-m': mode = a if o == '-r': res = int(a) if o == '-t': thresh = int(a) # process arguments if not args: print usage sys.exit(1) fileroot = args.pop(0) # do preparation prepdict = {'apj':ApJSubmissionPreparer, 'arxiv':ArXivSubmissionPreparer, } if mode not in prepdict: print "invalid mode! must be one of %s" % repr(prepdict.keys()) sys.exit(3) preparer = prepdict[mode]() preparer.prep(fileroot, strip=strip, maxauths=maxauths, do_input=do_input, do_figcheck=do_figcheck, res=res, thresh=thresh)