diff options
Diffstat (limited to 'reproduce')
| -rw-r--r-- | reproduce/analysis/config/INPUTS.conf | 8 | ||||
| -rw-r--r-- | reproduce/analysis/config/delete-me-squared-num.conf | 9 | ||||
| -rw-r--r-- | reproduce/analysis/config/demo-year.conf | 3 | ||||
| -rw-r--r-- | reproduce/analysis/config/metadata.conf | 10 | ||||
| -rw-r--r-- | reproduce/analysis/make/delete-me.mk | 169 | ||||
| -rw-r--r-- | reproduce/analysis/make/demo-plot.mk | 88 | ||||
| -rw-r--r-- | reproduce/analysis/make/download.mk | 17 | ||||
| -rw-r--r-- | reproduce/analysis/make/format.mk | 86 | ||||
| -rw-r--r-- | reproduce/analysis/make/initialize.mk | 19 | ||||
| -rw-r--r-- | reproduce/analysis/make/paper.mk | 45 | ||||
| -rw-r--r-- | reproduce/analysis/make/top-make.mk | 5 | ||||
| -rw-r--r-- | reproduce/analysis/make/verify.mk | 11 | ||||
| -rw-r--r-- | reproduce/software/config/TARGETS.conf | 7 | ||||
| -rw-r--r-- | reproduce/software/config/texlive-packages.conf | 9 | ||||
| -rwxr-xr-x | reproduce/software/shell/configure.sh | 28 | 
15 files changed, 281 insertions, 233 deletions
| diff --git a/reproduce/analysis/config/INPUTS.conf b/reproduce/analysis/config/INPUTS.conf index 6ddaec7..b1cf546 100644 --- a/reproduce/analysis/config/INPUTS.conf +++ b/reproduce/analysis/config/INPUTS.conf @@ -9,7 +9,7 @@  # this notice are preserved.  This file is offered as-is, without any  # warranty. -WFPC2IMAGE = WFPC2ASSNu5780205bx.fits -WFPC2MD5   = a4791e42cd1045892f9c41f11b50bad8 -WFPC2SIZE  = 62kb -WFPC2URL   = https://fits.gsfc.nasa.gov/samples +MK20DATA = menke20.xlsx +MK20MD5  = 8e4eee64791f351fec58680126d558a0 +MK20SIZE = 1.9MB +MK20URL  = https://www.biorxiv.org/content/biorxiv/early/2020/01/18/2020.01.15.908111/DC1/embed/media-1.xlsx diff --git a/reproduce/analysis/config/delete-me-squared-num.conf b/reproduce/analysis/config/delete-me-squared-num.conf deleted file mode 100644 index c86f841..0000000 --- a/reproduce/analysis/config/delete-me-squared-num.conf +++ /dev/null @@ -1,9 +0,0 @@ -# Number of samples in the demonstration analysis (to be deleted). -# -# Copyright (C) 2019-2020 Mohammad Akhlaghi <mohammad@akhlaghi.org> -# -# Copying and distribution of this file, with or without modification, are -# permitted in any medium without royalty provided the copyright notice and -# this notice are preserved.  This file is offered as-is, without any -# warranty. -delete-me-squared-num = 50 diff --git a/reproduce/analysis/config/demo-year.conf b/reproduce/analysis/config/demo-year.conf new file mode 100644 index 0000000..429b220 --- /dev/null +++ b/reproduce/analysis/config/demo-year.conf @@ -0,0 +1,3 @@ +# This is the demonstration year showing the number of papers studied +# before 1997. +menke-demo-year = 1996 diff --git a/reproduce/analysis/config/metadata.conf b/reproduce/analysis/config/metadata.conf index 533d927..cddc33f 100644 --- a/reproduce/analysis/config/metadata.conf +++ b/reproduce/analysis/config/metadata.conf @@ -10,14 +10,14 @@  # warranty.  # Project information -metadata-title = The project title goes here +metadata-title = Towards Long-term and Archivable Reproducibility  # DOIs and identifiers. -metadata-arxiv = -metadata-doi-zenodo = +metadata-arxiv = 2006.03018 +metadata-doi-zenodo = https://doi.org/10.5281/zenodo.3872248  metadata-doi-journal = -metadata-doi = $(metadata-doi-journal) -metadata-git-repository = http://git.maneage.org/project.git +metadata-doi = $(metadata-doi-zenodo) +metadata-git-repository = https://gitlab.com/makhlaghi/maneage-paper  # DATA Copyright owner and license information.  metadata-copyright-owner = Mohammad Akhlaghi <mohammad@akhlaghi.org> diff --git a/reproduce/analysis/make/delete-me.mk b/reproduce/analysis/make/delete-me.mk deleted file mode 100644 index f45f9ea..0000000 --- a/reproduce/analysis/make/delete-me.mk +++ /dev/null @@ -1,169 +0,0 @@ -# Dummy Makefile to create a random dataset for plotting. -# -# Copyright (C) 2018-2020 Mohammad Akhlaghi <mohammad@akhlaghi.org> -# -# This Makefile is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This Makefile is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this Makefile.  If not, see <http://www.gnu.org/licenses/>. - - - - - -# Dummy dataset -# ------------- -# -# Just as a demonstration(!): we will use AWK to generate a table showing X -# and X^2 and draw its plot. -# -# Note that this dataset is directly read by LaTeX to generate a plot, so -# we need to put it in the $(tex-publish-dir) directory. -dm-squared = $(tex-publish-dir)/squared.txt -$(dm-squared): $(pconfdir)/delete-me-squared-num.conf | $(tex-publish-dir) - -        # When the plotted values are re-made, it is necessary to also -        # delete the TiKZ externalized files so the plot is also re-made by -        # PGFPlots. -	rm -f $(tikzdir)/delete-me-squared.pdf - -        # Write the column metadata in a temporary file name (appending -        # '.tmp' to the actual target name). Once all steps are done, it is -        # renamed to the final target. We do this because if there is an -        # error in the middle, Make will not consider the job to be -        # complete and will stop here. -	echo "# Data for demonstration plot of default Maneage (MANaging data linEAGE)." > $@.tmp -	echo "# It is a simple plot, showing the power of two: y=x^2! " >> $@.tmp -	echo "# " >> $@.tmp -	echo "# Column 1: X       [arbitrary, f32] The horizontal axis numbers." \ -	     >> $@.tmp -	echo "# Column 2: X_POW2  [arbitrary, f32] The horizontal axis to the power of two." \ -	     >> $@.tmp -	echo "# " >> $@.tmp -	$(call print-copyright, $@.tmp) - -        # Generate the table of random values. -	awk 'BEGIN {for(i=1;i<=$(delete-me-squared-num);i+=0.5) \ -	              printf("%-8.1f%.2f\n", i, i*i); }' >> $@.tmp - -        # Write it into the final target -	mv $@.tmp $@ - - - - - -# WFPC2 image PDF -# ----------------- -# -# For an example image, we'll make a PDF copy of the WFPC II image to -# display in the paper. -dm-histdir = $(texdir)/image-histogram -$(dm-histdir): | $(texdir); mkdir $@ -dm-img-pdf = $(dm-histdir)/wfpc2.pdf -$(dm-img-pdf): $(dm-histdir)/%.pdf: $(indir)/%.fits | $(dm-histdir) - -        # When the plotted values are re-made, it is necessary to also -        # delete the TiKZ externalized files so the plot is also re-made. -	rm -f $(tikzdir)/delete-me-image-histogram.pdf - -        # Convert the dataset to a PDF. -	astconvertt --colormap=gray --fluxhigh=4 $< -h0 -o$@ - - - - - -# Histogram of WFPC2 image -# ------------------------ -# -# For an example plot, we'll show the pixel value histogram also. IMPORTANT -# NOTE: because this histogram contains data that is included in a plot, we -# should publish it, so it will go into the $(tex-publish-dir). -dm-img-histogram = $(tex-publish-dir)/wfpc2-histogram.txt -$(dm-img-histogram): $(tex-publish-dir)/%-histogram.txt: $(indir)/%.fits \ -                     | $(tex-publish-dir) - -        # When the plotted values are re-made, it is necessary to also -        # delete the TiKZ externalized files so the plot is also re-made. -	rm -f $(tikzdir)/delete-me-image-histogram.pdf - -        # Generate the pixel value histogram. -	aststatistics --lessthan=5 $< -h0 --histogram -o$@.data - -        # Put a two-line description of the dataset, copy the column -        # metadata from '$@.data', and add copyright. -	echo "# Histogram of example image to demonstrate Maneage (MANaging data linEAGE)." \ -	     > $@.tmp -	echo "# Example image URL: $(WFPC2URL)/$(WFPC2IMAGE)" >> $@.tmp -	echo "# " >> $@.tmp -	awk '/^# Column .:/' $@.data >> $@.tmp -	echo "# " >> $@.tmp -	$(call print-copyright, $@.tmp) - -        # Add the column numbers in a formatted manner, rename it to the -        # output and clean up. -	awk '!/^#/{printf("%-15.4f%d\n", $$1, $$2)}' $@.data >> $@.tmp -	mv $@.tmp $@ -	rm $@.data - - - - - -# Basic statistics -# ---------------- -# -# This is just as a demonstration on how to get analysic configuration -# parameters from variables defined in `reproduce/analysis/config/'. -dm-img-stats = $(dm-histdir)/wfpc2-stats.txt -$(dm-img-stats): $(dm-histdir)/%-stats.txt: $(indir)/%.fits \ -                 | $(dm-histdir) -	aststatistics $< -h0 --mean --median > $@ - - - - - -# TeX macros -# ---------- -# -# This is how we write the necessary parameters in the final PDF. -# -# NOTE: In LaTeX you cannot use any non-alphabetic character in a variable -# name. -$(mtexdir)/delete-me.tex: $(dm-squared) $(dm-img-pdf) $(dm-img-histogram) \ -                          $(dm-img-stats) - -        # Write the number of random values used. -	echo "\newcommand{\deletemenum}{$(delete-me-squared-num)}" > $@ - -        # Note that since Make variables start with a `$(', if you want to -        # use `$' within the shell (not Make), you have to quote any -        # occurance of `$' with another `$'. That is why there are `$$' in -        # the AWK command below. -        # -        # Here, we are first using AWK to find the minimum and maximum -        # values, then using it again to read each separately to use in the -        # macro definition. -	mm=$$(awk 'BEGIN{min=99999; max=-min} -	           !/^#/{if($$2>max) max=$$2; if($$2<min) min=$$2;} -	           END{print min, max}' $(dm-squared)); -	v=$$(echo "$$mm" | awk '{printf "%.3f", $$1}'); -	echo "\newcommand{\deletememin}{$$v}"             >> $@ -	v=$$(echo "$$mm" | awk '{printf "%.3f", $$2}'); -	echo "\newcommand{\deletememax}{$$v}"             >> $@ - -        # Write the statistics of the WFPC2 image as a macro. -	mean=$$(awk     '{printf("%.2f", $$1)}' $(dm-img-stats)) -	echo "\newcommand{\deletemewfpctwomean}{$$mean}"          >> $@ -	median=$$(awk   '{printf("%.2f", $$2)}' $(dm-img-stats)) -	echo "\newcommand{\deletemewfpctwomedian}{$$median}"      >> $@ diff --git a/reproduce/analysis/make/demo-plot.mk b/reproduce/analysis/make/demo-plot.mk new file mode 100644 index 0000000..5ddb3d7 --- /dev/null +++ b/reproduce/analysis/make/demo-plot.mk @@ -0,0 +1,88 @@ +# Second step of analysis: +#    Data for plot of number/fraction of tools per year. +# +# Copyright (C) 2020 Mohammad Akhlaghi <mohammad@akhlaghi.org> +# +# This Makefile is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. +# +# This Makefile is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General +# Public License for more details. See <http://www.gnu.org/licenses/>. + + + + +# Directory to host outputs +# ------------------------- +a2dir = $(texdir)/to-publish +$(a2dir):; mkdir $@ + + + + + +# Table for Figure 1C of Menke+20 +# ------------------------------- +a2mk20f1c = $(a2dir)/tools-per-year.txt +$(a2mk20f1c): $(mk20tab3) | $(a2dir) + +        # Remove the (possibly) produced figure that is created from this +        # table: it is created by LaTeX's TiKZ package, and includes +        # multiple files with a fixed prefix. +	rm -f $(tikzdir)/figure-tools-per-year* + +        # Write the column metadata in a temporary file name (appending +        # '.tmp' to the actual target name). Once all steps are done, it is +        # renamed to the final target. We do this because if there is an +        # error in the middle, Make will not consider the job to be +        # complete and will stop here. +	echo "# Data of plot showing fraction of papers that mentioned software tools" > $@.tmp +	echo "# per year to demonstrate the features of Maneage (MANaging data linEAGE)." >> $@.tmp +	     >> $@.tmp +	echo "# Raw data taken from Menke+2020 (https://doi.org/10.1101/2020.01.15.908111)." \ +	     >> $@.tmp +	echo "# " >> $@.tmp +	echo "# Column 1: YEAR       [count, u16] Publication year of papers." \ +	     >> $@.tmp +	echo "# Column 2: WITH_TOOLS [frac,  f32] Fraction of papers mentioning software tools." \ +	     >> $@.tmp +	echo "# Column 3: NUM_PAPERS [count, u32] Total number of papers studied in that year." \ +	     >> $@.tmp +	echo "# " >> $@.tmp +	$(call print-copyright, $@.tmp) + + +        # Find the maximum number of papers. +	awk '!/^#/{all[$$1]+=$$2; id[$$1]+=$$3} \ +	     END{ for(year in all) \ +	            printf("%-7d%-10.3f%d\n", year, 100*id[year]/all[year], \ +	                   all[year]) \ +	        }' $< \ +	    >> $@.tmp + +        # Write it into the final target +	mv $@.tmp $@ + + + + + +# Final LaTeX macro +$(mtexdir)/demo-plot.tex: $(a2mk20f1c) $(pconfdir)/demo-year.conf + +        # Find the first year (first column of first row) of data. +	v=$$(awk '!/^#/ && c==0{c++; print $$1}' $(a2mk20f1c)) +	echo "\newcommand{\menkefirstyear}{$$v}" > $@ + +        # Find the number of rows in the plotted table. +	v=$$(awk '!/^#/{c++} END{print c}' $(a2mk20f1c)) +	echo "\newcommand{\menkenumyears}{$$v}" >> $@ + +        # Find the number of papers in 1996. +	v=$$(awk '$$1==$(menke-demo-year){print $$3}' $(a2mk20f1c)) +	echo "\newcommand{\menkenumpapersdemocount}{$$v}" >> $@ +	echo "\newcommand{\menkenumpapersdemoyear}{$(menke-demo-year)}" >> $@ diff --git a/reproduce/analysis/make/download.mk b/reproduce/analysis/make/download.mk index 71ee7d3..8d9c164 100644 --- a/reproduce/analysis/make/download.mk +++ b/reproduce/analysis/make/download.mk @@ -53,12 +53,12 @@  # progress at every moment.  $(indir):; mkdir $@  downloadwrapper = $(bashdir)/download-multi-try -inputdatasets = $(foreach i, wfpc2, $(indir)/$(i).fits) -$(inputdatasets): $(indir)/%.fits: | $(indir) $(lockdir) +inputdatasets = $(indir)/menke20.xlsx +$(inputdatasets): $(indir)/%: | $(indir) $(lockdir)          # Set the necessary parameters for this input file. -	if   [ $* = wfpc2 ]; then -	  origname=$(WFPC2IMAGE); url=$(WFPC2URL); mdf=$(WFPC2MD5); +	if   [ $* = menke20.xlsx ]; then +	  origname=$(MK20DATA); fullurl=$(MK20URL); mdf=$(MK20MD5);  	else  	echo; echo; echo "Not recognized input dataset: '$*.fits'."  	echo; echo; exit 1 @@ -76,7 +76,7 @@ $(inputdatasets): $(indir)/%.fits: | $(indir) $(lockdir)  	else  	  touch $(lockdir)/download  	  $(downloadwrapper) "wget --no-use-server-timestamps -O" \ -	                     $(lockdir)/download $$url/$$origname $@ +	                     $(lockdir)/download $$fullurl $@  	fi          # Check the md5 sum to see if this is the proper dataset. @@ -97,5 +97,8 @@ $(inputdatasets): $(indir)/%.fits: | $(indir) $(lockdir)  #  # It is very important to mention the address where the data were  # downloaded in the final report. -$(mtexdir)/download.tex: $(pconfdir)/INPUTS.conf | $(mtexdir) -	echo "\\newcommand{\\wfpctwourl}{$(WFPC2URL)}" > $@ +$(mtexdir)/download.tex: $(indir)/menke20.xlsx | $(mtexdir) +	echo "\newcommand{\menketwentyxlsxname}{$(MK20DATA)}"  > $@ +	echo "\newcommand{\menketwentychecksum}{$(MK20MD5)}"  >> $@ +	echo "\newcommand{\menketwentybytesize}{$(MK20SIZE)}" >> $@ +	echo "\newcommand{\menketwentyurl}{$(MK20URL)}"       >> $@ diff --git a/reproduce/analysis/make/format.mk b/reproduce/analysis/make/format.mk new file mode 100644 index 0000000..3070e6a --- /dev/null +++ b/reproduce/analysis/make/format.mk @@ -0,0 +1,86 @@ +# First step of analysis: +#    Prepare the data, return basic values. +# +# As a demonstration analysis to go with the paper, we use the data from +# Menke 2020 (DOI:10.1101/2020.01.15.908111). This is a relevant paper +# because it provides interesting statistics about tools and methods used +# in scientific papers. +# +# Copyright (C) 2020 Mohammad Akhlaghi <mohammad@akhlaghi.org> +# +# This Makefile is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. +# +# This Makefile is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General +# Public License for more details. See <http://www.gnu.org/licenses/>. + + + + +# Save the "Table 3" spreadsheet from the downloaded `.xlsx' file into a +# simple plain-text file that is easy to use. +a1dir = $(BDIR)/analysis1 +mk20tab3 = $(a1dir)/table-3.txt +$(a1dir):; mkdir $@ +$(mk20tab3): $(indir)/menke20.xlsx | $(a1dir) + +        # Set a base-name for the table-3 data. +	base=$(basename $(notdir $<))-table-3 + +        # Unfortunately XLSX I/O only works when the input and output are +        # in the directory it is running. So first, we need to switch to +        # the input directory, run it, then put our desired output where we +        # want and delete the extra files. +	topdir=$$(pwd) +	cd $(indir) +	xlsxio_xlsx2csv $(notdir $<) +	cp $(notdir $<)."Table 3 All by journal by year".csv $$base.csv +	rm $(notdir $<).*.csv +	cd $$topdir + +        # Read the necessary information. Note that we are dealing with a +        # CSV (comma-separated value) file. But when there are commas in a +        # string, quotation signs are put around it. The `FPAT' values is +        # fully described in the GNU AWK manual. In short, it ensures that +        # if there is a comma in the middle of double-quotes, it doesn't +        # count as a delimter. +	echo "# Column 1: YEAR [counter, i16] Year of journal's publication." > $@.tmp +	echo "# Column 2: NUM_PAPERS [counter, i16] Number of studied papers in that journal." >> $@.tmp +	echo "# Column 3: NUM_PAPERS_WITH_TOOLS [counter, i16] Number of papers with an identified tool." >> $@.tmp +	echo "# Column 4: NUM_ID_TOOLS [counter, i16] Number of software/tools that were identified." >> $@.tmp +	echo "# Column 5: JOURNAL_NAME [string, str150] Name of journal." >> $@.tmp +	awk 'NR>1{printf("%-10d%-10d%-10d%-10d %s\n", $$2, $$3, $$3*$$NF, $$(NF-1), $$1)}' \ +	    FPAT='([^,]+)|("[^"]+")' $(indir)/$$base.csv >> $@.tmp + +        # Set the temporary file as the final target. This was done so if +        # there is any possible crash in the steps above, this rule is +        # re-run (its final target isn't rebuilt). +	mv $@.tmp $@ + + + + + +# Main LaTeX macro file +$(mtexdir)/format.tex: $(mk20tab3) + +        # Count the total number of papers in their study. +	v=$$(awk '!/^#/{c+=$$2} END{print c}' $(mk20tab3)) +	echo "\newcommand{\menkenumpapers}{$$v}" > $@ + +        # Count how many unique journals there were in the study. Note that +        # the `31' comes because we put 10 characters for each numeric +        # column and separated the last numeric column from the string +        # column with a space. If the number of numeric columns change in +        # the future, the `31' also has to change. +	v=$$(awk 'BEGIN{FIELDWIDTHS="41 10000"} !/^#/{print $$2}' \ +	         $(mk20tab3) | uniq | wc -l) +	echo "\newcommand{\menkenumjournals}{$$v}" >> $@ + +        # Count how many rows the original catalog has. +	v=$$(awk '!/^#/{c++} END{print c}' $(mk20tab3)) +	echo "\newcommand{\menkenumorigrows}{$$v}" >> $@ diff --git a/reproduce/analysis/make/initialize.mk b/reproduce/analysis/make/initialize.mk index 19447a6..489f9e3 100644 --- a/reproduce/analysis/make/initialize.mk +++ b/reproduce/analysis/make/initialize.mk @@ -132,6 +132,7 @@ curdir   := $(shell echo $$(pwd))  # we are also going to overwrite `TEXINPUTS' just before `pdflatex'.  .ONESHELL:  .SHELLFLAGS = -ec +export TERM=xterm  export TEXINPUTS :=  export CCACHE_DISABLE := 1  export PATH := $(installdir)/bin @@ -279,10 +280,13 @@ $(project-package-contents): paper.pdf | $(texdir)          # (including the bibliography).  	m=$$dir/Makefile  	echo   "paper.pdf: paper.tex paper.bbl"                   > $$m -	printf "\tpdflatex -shell-escape -halt-on-error paper\n" >> $$m +	printf "\tlatex -shell-escape -halt-on-error paper\n"    >> $$m +	printf "\tdvips paper.dvi\n"                             >> $$m +	printf "\tps2pdf -dNOSAFER paper.ps\n"                   >> $$m  	echo   "paper.bbl: tex/src/references.tex"               >> $$m -	printf "\tpdflatex -shell-escape -halt-on-error paper\n" >> $$m -	printf "\tbiber paper\n"                                 >> $$m +	printf "\tlatex -shell-escape -halt-on-error paper\n"    >> $$m +	printf "\tbibtex paper\n"                                >> $$m +	printf "\tlatex -shell-escape -halt-on-error paper\n"    >> $$m  	echo   ".PHONY: clean"                                   >> $$m  	echo   "clean:"                                          >> $$m  	printf "\trm -f *.aux *.auxlock *.bbl *.bcf\n"           >> $$m @@ -303,7 +307,8 @@ $(project-package-contents): paper.pdf | $(texdir)          # Copy all the necessary `reproduce' and `tex' contents.  	shopt -s extglob  	cp -r tex/src                            $$dir/tex/src -	cp tex/tikz/*.pdf                        $$dir/tex/tikz +	cp -r tex/img                            $$dir/tex/img +	cp tex/tikz/*.eps                        $$dir/tex/tikz  	cp -r reproduce/*                        $$dir/reproduce  	cp -r tex/build/!($(project-package-name)) $$dir/tex/build @@ -331,8 +336,8 @@ $(project-package-contents): paper.pdf | $(texdir)          # that may cause problems on the arXiv server.  	cp tex/build/build/paper.bbl $$dir/  	tltopdir=.local/texlive/maneage/texmf-dist/tex/latex -	find $$tltopdir/biblatex/ -maxdepth 1 -type f -print0 \ -	     | xargs -0 cp -t $$dir +	#find $$tltopdir/biblatex/ -maxdepth 1 -type f -print0 \ +	#     | xargs -0 cp -t $$dir          # Just in case the package users want to rebuild some of the          # figures (manually un-comment the `makepdf' command we commented @@ -442,7 +447,7 @@ print-copyright = \  	echo "\# Git commit (that produced this dataset): $(project-commit-hash)" >> $(1); \  	echo "\# Project's Git repository: $(metadata-git-repository)" >> $(1); \  	if [ x$(metadata-arxiv) != x ]; then \ -	  echo "\# Pre-print server: arXiv:$(metadata-arxiv)" >> $(1); fi; \ +	  echo "\# Pre-print server: https://arxiv.org/abs/$(metadata-arxiv)" >> $(1); fi; \  	if [ x$(metadata-doi-journal) != x ]; then \  	  echo "\# DOI (Journal): $(metadata-doi-journal)" >> $(1); fi; \  	if [ x$(metadata-doi-zenodo) != x ]; then \ diff --git a/reproduce/analysis/make/paper.mk b/reproduce/analysis/make/paper.mk index 226a37d..cc43117 100644 --- a/reproduce/analysis/make/paper.mk +++ b/reproduce/analysis/make/paper.mk @@ -44,7 +44,7 @@ $(mtexdir)/project.tex: $(mtexdir)/verify.tex          # If no PDF is requested, or if LaTeX isn't available, don't          # continue to building the final PDF. Otherwise, merge all the TeX          # macros into one for building the PDF. -	@if [ -f .local/bin/pdflatex ] && [ x"$(pdf-build-final)" != x ]; then +	@if [ -f .local/bin/latex ] && [ x"$(pdf-build-final)" != x ]; then            # Put a LaTeX input command for all the necessary macro files.  	  rm -f $(mtexdir)/project.tex @@ -86,7 +86,7 @@ $(mtexdir)/project.tex: $(mtexdir)/verify.tex  #  # NOTE: `$(mtexdir)/project.tex' is an order-only-prerequisite for  # `paper.bbl'. This is because we need to run LaTeX in both the `paper.bbl' -# recipe and the `paper.pdf' recipe. But if `tex/src/references.tex' hasn't +# recipe and the `paper.pdf' recipe. But if `tex/src/references.bib' hasn't  # been modified, we don't want to re-build the bibliography, only the final  # PDF.  $(texbdir)/paper.bbl: tex/src/references.tex $(mtexdir)/dependencies-bib.tex \ @@ -95,14 +95,37 @@ $(texbdir)/paper.bbl: tex/src/references.tex $(mtexdir)/dependencies-bib.tex \  	@macros=$$(cat $(mtexdir)/project.tex)  	if [ x"$$macros" != x ]; then +          # Unfortunately I can't get bibtex to look into a special +          # directory for the references, so we'll copy it here. +	  p=$$(pwd) +	  if ! [ -L $(texbdir)/references.bib ]; then +	    ln -s $$p/tex/src/references.tex $(texbdir)/references.bib +	  fi + +          # Copy the improved IEEE bst file into the build directory. +          # The improved bst file provides ArXiv clickable URLs and +          # if available, open-access URLs based on the DOIs, with +          # closed-access URLs as a fallback, via https://oadoi.org . +	  cp -pv $$p/tex/src/IEEEtran_openaccess.bst $(texbdir) +            # We'll run LaTeX first to generate the `.bcf' file (necessary            # for `biber') and then run `biber' to generate the `.bbl' file. -	  p=$$(pwd)  	  export TEXINPUTS=$$p:  	  cd $(texbdir); -	  pdflatex -shell-escape -halt-on-error $$p/paper.tex -	  biber paper - +	  latex -shell-escape -halt-on-error $$p/paper.tex +	  bibtex paper +          # Hack: tidy up eprint+doi style that didn't work in .bst file. +          # TODO (better): read Part 4 of +          # http://mirrors.ctan.org/info/bibtex/tamethebeast/ttb_en.pdf +          # and fix the .bst style properly. +	  cp paper.bbl paper-tmp.bbl \ +	    && sed -e "s/\'/EOLINE/g" paper-tmp.bbl \ +	    | tr -d '\n' \ +	    | sed -e 's/\([0-9]\)\( \|EOLINE\)}/\1}/g' \ +	    | sed -e 's/\([^,]\) *\( \|EOLINE\) *\\eprint/\1, \\eprint/g' \ +	    | sed -e 's/\([^,]\) *\( \|EOLINE\) *\\doi/\1, \\doi/g' \ +	    | sed -e 's/EOLINE/\n/g' > paper.bbl +	  latex -shell-escape -halt-on-error $$p/paper.tex  	fi @@ -127,11 +150,17 @@ paper.pdf: $(mtexdir)/project.tex paper.tex $(texbdir)/paper.bbl  	  p=$$(pwd)  	  export TEXINPUTS=$$p:  	  cd $(texbdir) -	  pdflatex -shell-escape -halt-on-error $$p/paper.tex +	  latex -shell-escape -halt-on-error $$p/paper.tex + +          # Convert the DVI to PostScript, and the PostScript to PDF. The +          # `-dNOSAFER' option to GhostScript allows transparencies in the +          # conversion from PostScript to PDF, see +          # https://www.ghostscript.com/doc/current/Language.htm#Transparency +	  dvips paper.dvi +	  ps2pdf -dNOSAFER paper.ps            # Come back to the top project directory and copy the built PDF            # file here.  	  cd $$p  	  cp $(texbdir)/$@ $(final-paper) -  	fi diff --git a/reproduce/analysis/make/top-make.mk b/reproduce/analysis/make/top-make.mk index 9a26f22..55b1f43 100644 --- a/reproduce/analysis/make/top-make.mk +++ b/reproduce/analysis/make/top-make.mk @@ -62,7 +62,7 @@ include reproduce/software/config/LOCAL.conf  # -----------------------------  #  # If you are just interested in the processing and don't want to build the -# PDF, you can skip the creatation of the final PDF by removing the value +# PDF, you can skip the creation of the final PDF by removing the value  # of `pdf-build-final' in `reproduce/analysis/config/pdf-build.conf'.  ifeq (x$(reproducible_paper_group_name),x$(GROUP-NAME))  all: paper.pdf @@ -112,7 +112,8 @@ endif  # wild-card like the configuration Makefiles).  makesrc = initialize \            download \ -          delete-me \ +          format \ +          demo-plot \            verify \            paper diff --git a/reproduce/analysis/make/verify.mk b/reproduce/analysis/make/verify.mk index 67b3fea..dd224d6 100644 --- a/reproduce/analysis/make/verify.mk +++ b/reproduce/analysis/make/verify.mk @@ -127,16 +127,15 @@ $(mtexdir)/verify.tex: $(foreach s, $(verify-dep), $(mtexdir)/$(s).tex)  	  rm -f $@.tmp            # Verify the figure datasets. -	  $(call verify-txt-no-comments-no-space, \ -	         $(dm-squared), 6b6d3b0f9c351de53606507b59bca5d1, $@.tmp) -	  $(call verify-txt-no-comments-no-space, \ -	         $(dm-img-histogram), b1f9c413f915a1ad96078fee8767b16c, $@.tmp) +	  $(call verify-txt-no-comments-leading-space, \ +	         $(a2mk20f1c), 76fc5b13495c4d8e8e6f8d440304cf69)            # Verify TeX macros (the values that go into the PDF text).  	  for m in $(verify-check); do  	    file=$(mtexdir)/$$m.tex -	    if   [ $$m == download  ]; then s=6749e17ce606d57d30cebdbc1a5d23ad -	    elif [ $$m == delete-me ]; then s=711e2f7fa1f16ecbeeb3df6bcb4ec705 +	    if   [ $$m == download  ]; then s=64da83ee3bfaa236849927cdc001f5d3 +	    elif [ $$m == format    ]; then s=e04d95a539b5540c940bf48994d8d45f +	    elif [ $$m == demo-plot ]; then s=48bffe6cf8db790c63a33302d20db77f  	    else echo; echo "'$$m' not recognized."; exit 1  	    fi  	    $(call verify-txt-no-comments-no-space, $$file, $$s, $@.tmp) diff --git a/reproduce/software/config/TARGETS.conf b/reproduce/software/config/TARGETS.conf index 8d3bdb0..50392fb 100644 --- a/reproduce/software/config/TARGETS.conf +++ b/reproduce/software/config/TARGETS.conf @@ -36,7 +36,10 @@  # Programs and libraries. -top-level-programs  = gnuastro +# +# Ghostscript: to build PDF paper (in particular the `ps2pdf' command). +# XLSXI/O: to read and write XLSX files. +top-level-programs = ghostscript xlsxio  # Python libraries/modules. -top-level-python    = +top-level-python = diff --git a/reproduce/software/config/texlive-packages.conf b/reproduce/software/config/texlive-packages.conf index c53e170..d88a0bf 100644 --- a/reproduce/software/config/texlive-packages.conf +++ b/reproduce/software/config/texlive-packages.conf @@ -16,9 +16,6 @@  # the basic installation scheme that we used to install tlmgr, they will be  # ignored in the `tlmgr install' command, but will be used later when we  # want their versions. -texlive-packages = tex fancyhdr ec newtx fontaxes xkeyval etoolbox xcolor \ -                   setspace caption footmisc datetime fmtcount titlesec \ -                   preprint ulem biblatex biber logreq pgf pgfplots fp \ -                   courier tex-gyre txfonts times csquotes kastrup \ -                   trimspaces pdftexcmds pdfescape letltxmacro bitset \ -                   mweights +texlive-typewriter-pkgs = courier inconsolata xkeyval upquote +texlive-packages = times IEEEtran cite xcolor pgfplots ps2eps \ +                   listings ulem etoolbox $(texlive-typewriter-pkgs) diff --git a/reproduce/software/shell/configure.sh b/reproduce/software/shell/configure.sh index 789ddd5..3b3c38f 100755 --- a/reproduce/software/shell/configure.sh +++ b/reproduce/software/shell/configure.sh @@ -778,10 +778,10 @@ if [ x"$input_dir" = x ]; then  else      indir=$input_dir  fi -wfpc2name=$(awk '!/^#/ && $1=="WFPC2IMAGE" {print $3}' $adir/INPUTS.conf) -wfpc2md5=$(awk  '!/^#/ && $1=="WFPC2MD5"   {print $3}' $adir/INPUTS.conf) -wfpc2size=$(awk '!/^#/ && $1=="WFPC2SIZE"  {print $3}' $adir/INPUTS.conf) -wfpc2url=$(awk  '!/^#/ && $1=="WFPC2URL"   {print $3}' $adir/INPUTS.conf) +mk20name=$(awk '!/^#/ && $1=="MK20DATA" {print $3}' $adir/INPUTS.conf) +mk20md5=$(awk  '!/^#/ && $1=="MK20MD5"  {print $3}' $adir/INPUTS.conf) +mk20size=$(awk '!/^#/ && $1=="MK20SIZE" {print $3}' $adir/INPUTS.conf) +mk20url=$(awk  '!/^#/ && $1=="MK20URL"  {print $3}' $adir/INPUTS.conf)  if [ $rewritepconfig = yes ] && [ x"$input_dir" = x ]; then      cat <<EOF @@ -794,10 +794,9 @@ please specify the directory hosting them on this system. If you don't,  they will be downloaded automatically. Each file is shown with its total  volume and its 128-bit MD5 checksum in parenthesis. -  $wfpc2name ($wfpc2size, $wfpc2md5): -    A 100x100 Hubble Space Telescope WFPC II image used in the FITS -    standard webpage as a demonstration of this file format. -    URL: $wfpc2url/$wfpc2name +  $mk20name ($mk20size, $mk20md5): +    Supplementary dataset to http://dx.doi.org/10.1101/2020.01.15.908111 +    URL: $mk20url  NOTE I: This directory, or the datasets above, are optional. If it doesn't  exist, the files will be downloaded in the build directory and used. @@ -1045,6 +1044,19 @@ if ! [ -d $texbdir ]; then mkdir $texbdir; fi  tikzdir=$texbdir/tikz  if ! [ -d $tikzdir ]; then mkdir $tikzdir; fi +# If 'tex/build' and 'tex/tikz' aren't symbolic links, then we are in the +# tarball (not the Git repository), so we'll give them another name and let +# the script continue normally. +if rm -f tex/build; then +    rm -f tex/tikz +else +    mv tex/tikz tex/tikz-from-tarball +    mv tex/build tex/build-from-tarball +fi + + + +  # If 'tex/build' and 'tex/tikz' are symbolic links then 'rm -f' will delete  # them and we can continue. However, when the project is being built from | 
