############################################################################
#       
#        bibtex-2-tex2rtf.pl
# 
#        Takes a free bibtex file and converts this to the slightly
#        simplified form required for compatibility with tex2rtf.
#
#        Changes include getting rid of LaTeX accent forms so that
#        they are normal characters, dropping certain unrecognized
#        extension fields, and removing and final commas (as are
#        introduced by jabref for example).
#
#   TO DO: this does not yet quite work, in that a few too many
#          commas are sometimes taken out and some too many
#          left in! Needs a careful debug of the final 10 lines
#          of replacements.
#
#        Author: John Bateman
#
#        Created: February 2005
#
############################################################################

#!/usr/bin/perl -w

open(IN, "in.bib");
open(OUT, "> in-tex2rtf.bib");

# It is easier to process if we put every bibtex entry in a single
# string since bibtex entries do not use newlines in any way.

    $/ = "";

while (<IN>) {
  if (/^\s*?\@/) {
    if (/\@comment/i || /\@STRING/i) {
      print STDOUT "Ignoring and removing:\n";
      print $_;
      # skip this 
    } else {
	# do the business...

	s/                  / /;

	# clear the following fields as not being allowed...

	# abstracts can be more complicated...
	# should count the opening and closing braces properly
	# For now try to cheat...

	s/ abstract\s*=\s*{.*?},\s*?\n/,/is;

	foreach $unwanted_field 
	  ("place", "topics", "crossref",  "abstract",
	   "url", "anote", "pdfurl", "psurl", "project", "key",
	   "location", "isbn", "date", "status", "slides",
	   "keywords", "bibsource", "url\_checked", "repourl", "topics")
	    {
	      s/ $unwanted_field\s*=\s*{.*?}//is;
	      s/\s$unwanted_field\s*=\s*\".*?\"//is;
	    };
	s/ month\s=\s.*?},/,/;
	s/ month\s=\s.*?\",/,/;
	s/ month\s=\s.*?,/,/;
	s/ lc\s*= .*?,/,/s;

	#
	# clear up accents, etc. for the final icing on the cake...
	#
	s/\\ldots /... /g;
	
	s/{\\\"{a}}/ä/g;
	s/{\\\"{o}}/ö/g;
	s/{\\\"{u}}/ü/g;
	
	s/{\\ss}\\ /ß /g;
	s/\\ss\}/ß\}/g;
        s/\\ss /ß/g;
	s/\\ss/ß/g;

	s/{\\\"u}/ü/g;
	s/{\\\"a}/ä/g;
	s/{\\\"o}/ö/g;
	s/{\\\'a}/á/g;
	s/{\\\'e}/é/g;
	s/{\\\'o}/ó/g;
	s/{\\\`e}/è/g;
	s/{\\~n}/ñ/g;
        s/\\~n/ñ/g;
        s/{\\\"{\\i}}/ï/g;
        s/{\\\"e}/ë/g;

	s/\\\"a/ä/g;
	s/\\\"u/ü/g;
	s/\\\"o/ö/g;
	s/\\\'e/é/g;
	s/\\\`i/ì/g;
	s/\\\'i/í/g;
	s/\\\'a/á/g;
	s/\\\`e/è/g;
	s/\\\`a/à/g;
	s/\\\`o/ò/g;
	s/\\\'o/ó/g;
	s/\\\`u/ù/g;
	s/\\\´e/é/g;
	s/\\\^{I}/î/g;

	s/\\\'{e}/é/g;
	s/\\\'{a}/á/g;
	s/\\\'{o}/ó/g;
	s/\\\`{e}/è/g;
	s/\\\`{a}/à/g;
	s/\\\`{o}/ò/g;
        s/\\\"{o}/ö/g;
        s/\\\"{u}/ü/g;
        s/\\\"{a}/ä/g;

	s/\\\'{\\i}/í/g;
	
	s/{\\\"U}/Ü/g;
	s/{\\\"A}/Ä/g;
	s/{\\\"O}/Ö/g;
	s/{\\\'A}/Á/g;
	s/{\\\'E}/É/g;
	s/{\\\'O}/Ó/g;
	s/{\\\`E}/È/g;

	s/{\\'y}/ý/g;

	s/\\\'A/Á/g;
	s/\\\"A/Ä/g;
	s/\\\"U/Ü/g;
	s/\\\"O/Ö/g;
	s/\\\`I/Ì/g;
	s/\\\'I/Í/g;

        s/\\\"i/ï/g;
        s/\\mbox{\\o}/ø/g;

	s/\\AA ?/Å/g;
	s/\\aa ?/å/g;
	s/{\\c c}/ç/g;
	s/\\c *?c/ç/g;
	s/\\c{c}/ç/g;
        s/\\c{o}/o/g;

# can't handle these yet...
	   s/{\\v (.)}/$1/g;
	   s/\\\v{(.*?)}/$1/g;

# do something about the home character in URLs...
# although some of these should disappear...

	   s/\$~\$/~/g;
	   s/\$\\sim\$/~/g;

# odds and ends...
	   s/\$\\overline{X}\$/X-bar/g;
	   s/\\ / /g;
	   s/\$\\beta\$/ß/g;
	   s/\\&/ and /g;
	   s/\\_/_/g;

# dump any extra commas...
           s/,\s*?,/,/g;
	   # if (/,\n \n/s) { s/,$//m; };
	   s/\n\s*,\s*\n//;
	   s/,\n \n//gs;
	   s/,\s*,/,/g;
	   s/,\s*?}/}/g;
    	   s/,\s*?}/\n}/gs;

# if (/,\n \n/s) { 
#	     s/,\n \n/\n/s;
#	   };

           s/^\s*,\s*$//m;
           if (/,\n \n/s) { 
	     print STDOUT "nasty: \n$_"; 
	   };

	   print OUT $_;
	
    }}};

print STDOUT "DONE!";
close(IN);
close(OUT);