############################################################################ # # bibtex-2-tex2rtf.pl # # Takes a free bibtex file and converts this to the slightly # simplified form required for compatibility with tex2rtf. # # Changes include getting rid of LaTeX accent forms so that # they are normal characters, dropping certain unrecognized # extension fields, and removing and final commas (as are # introduced by jabref for example). # # TO DO: this does not yet quite work, in that a few too many # commas are sometimes taken out and some too many # left in! Needs a careful debug of the final 10 lines # of replacements. # # Author: John Bateman # # Created: February 2005 # ############################################################################ #!/usr/bin/perl -w open(IN, "in.bib"); open(OUT, "> in-tex2rtf.bib"); # It is easier to process if we put every bibtex entry in a single # string since bibtex entries do not use newlines in any way. $/ = ""; while () { if (/^\s*?\@/) { if (/\@comment/i || /\@STRING/i) { print STDOUT "Ignoring and removing:\n"; print $_; # skip this } else { # do the business... s/ / /; # clear the following fields as not being allowed... # abstracts can be more complicated... # should count the opening and closing braces properly # For now try to cheat... s/ abstract\s*=\s*{.*?},\s*?\n/,/is; foreach $unwanted_field ("place", "topics", "crossref", "abstract", "url", "anote", "pdfurl", "psurl", "project", "key", "location", "isbn", "date", "status", "slides", "keywords", "bibsource", "url\_checked", "repourl", "topics") { s/ $unwanted_field\s*=\s*{.*?}//is; s/\s$unwanted_field\s*=\s*\".*?\"//is; }; s/ month\s=\s.*?},/,/; s/ month\s=\s.*?\",/,/; s/ month\s=\s.*?,/,/; s/ lc\s*= .*?,/,/s; # # clear up accents, etc. for the final icing on the cake... # s/\\ldots /... /g; s/{\\\"{a}}/ä/g; s/{\\\"{o}}/ö/g; s/{\\\"{u}}/ü/g; s/{\\ss}\\ /ß /g; s/\\ss\}/ß\}/g; s/\\ss /ß/g; s/\\ss/ß/g; s/{\\\"u}/ü/g; s/{\\\"a}/ä/g; s/{\\\"o}/ö/g; s/{\\\'a}/á/g; s/{\\\'e}/é/g; s/{\\\'o}/ó/g; s/{\\\`e}/è/g; s/{\\~n}/ñ/g; s/\\~n/ñ/g; s/{\\\"{\\i}}/ï/g; s/{\\\"e}/ë/g; s/\\\"a/ä/g; s/\\\"u/ü/g; s/\\\"o/ö/g; s/\\\'e/é/g; s/\\\`i/ì/g; s/\\\'i/í/g; s/\\\'a/á/g; s/\\\`e/è/g; s/\\\`a/à/g; s/\\\`o/ò/g; s/\\\'o/ó/g; s/\\\`u/ù/g; s/\\\´e/é/g; s/\\\^{I}/î/g; s/\\\'{e}/é/g; s/\\\'{a}/á/g; s/\\\'{o}/ó/g; s/\\\`{e}/è/g; s/\\\`{a}/à/g; s/\\\`{o}/ò/g; s/\\\"{o}/ö/g; s/\\\"{u}/ü/g; s/\\\"{a}/ä/g; s/\\\'{\\i}/í/g; s/{\\\"U}/Ü/g; s/{\\\"A}/Ä/g; s/{\\\"O}/Ö/g; s/{\\\'A}/Á/g; s/{\\\'E}/É/g; s/{\\\'O}/Ó/g; s/{\\\`E}/È/g; s/{\\'y}/ý/g; s/\\\'A/Á/g; s/\\\"A/Ä/g; s/\\\"U/Ü/g; s/\\\"O/Ö/g; s/\\\`I/Ì/g; s/\\\'I/Í/g; s/\\\"i/ï/g; s/\\mbox{\\o}/ø/g; s/\\AA ?/Å/g; s/\\aa ?/å/g; s/{\\c c}/ç/g; s/\\c *?c/ç/g; s/\\c{c}/ç/g; s/\\c{o}/o/g; # can't handle these yet... s/{\\v (.)}/$1/g; s/\\\v{(.*?)}/$1/g; # do something about the home character in URLs... # although some of these should disappear... s/\$~\$/~/g; s/\$\\sim\$/~/g; # odds and ends... s/\$\\overline{X}\$/X-bar/g; s/\\ / /g; s/\$\\beta\$/ß/g; s/\\&/ and /g; s/\\_/_/g; # dump any extra commas... s/,\s*?,/,/g; # if (/,\n \n/s) { s/,$//m; }; s/\n\s*,\s*\n//; s/,\n \n//gs; s/,\s*,/,/g; s/,\s*?}/}/g; s/,\s*?}/\n}/gs; # if (/,\n \n/s) { # s/,\n \n/\n/s; # }; s/^\s*,\s*$//m; if (/,\n \n/s) { print STDOUT "nasty: \n$_"; }; print OUT $_; }}}; print STDOUT "DONE!"; close(IN); close(OUT);