diff options
Diffstat (limited to 'reproduce')
-rw-r--r-- | reproduce/analysis/config/INPUTS.conf | 8 | ||||
-rw-r--r-- | reproduce/analysis/config/delete-me-num.conf | 2 | ||||
-rw-r--r-- | reproduce/analysis/config/menke-demo-year.conf | 3 | ||||
-rw-r--r-- | reproduce/analysis/config/verify-outputs.conf | 2 | ||||
-rw-r--r-- | reproduce/analysis/make/delete-me.mk | 129 | ||||
-rw-r--r-- | reproduce/analysis/make/demo-plot.mk | 59 | ||||
-rw-r--r-- | reproduce/analysis/make/download.mk | 14 | ||||
-rw-r--r-- | reproduce/analysis/make/format.mk | 82 | ||||
-rw-r--r-- | reproduce/analysis/make/initialize.mk | 1 | ||||
-rw-r--r-- | reproduce/analysis/make/paper.mk | 6 | ||||
-rw-r--r-- | reproduce/analysis/make/top-make.mk | 3 | ||||
-rw-r--r-- | reproduce/analysis/make/verify.mk | 3 | ||||
-rw-r--r-- | reproduce/software/config/installation/TARGETS.conf | 4 | ||||
-rw-r--r-- | reproduce/software/config/installation/texlive-packages.conf | 5 | ||||
-rw-r--r-- | reproduce/software/make/high-level.mk | 3 | ||||
-rwxr-xr-x | reproduce/software/shell/configure.sh | 15 |
16 files changed, 178 insertions, 161 deletions
diff --git a/reproduce/analysis/config/INPUTS.conf b/reproduce/analysis/config/INPUTS.conf index 6ddaec7..b1cf546 100644 --- a/reproduce/analysis/config/INPUTS.conf +++ b/reproduce/analysis/config/INPUTS.conf @@ -9,7 +9,7 @@ # this notice are preserved. This file is offered as-is, without any # warranty. -WFPC2IMAGE = WFPC2ASSNu5780205bx.fits -WFPC2MD5 = a4791e42cd1045892f9c41f11b50bad8 -WFPC2SIZE = 62kb -WFPC2URL = https://fits.gsfc.nasa.gov/samples +MK20DATA = menke20.xlsx +MK20MD5 = 8e4eee64791f351fec58680126d558a0 +MK20SIZE = 1.9MB +MK20URL = https://www.biorxiv.org/content/biorxiv/early/2020/01/18/2020.01.15.908111/DC1/embed/media-1.xlsx diff --git a/reproduce/analysis/config/delete-me-num.conf b/reproduce/analysis/config/delete-me-num.conf deleted file mode 100644 index 17f608c..0000000 --- a/reproduce/analysis/config/delete-me-num.conf +++ /dev/null @@ -1,2 +0,0 @@ -# Number of samples to create -delete-me-num = 50 diff --git a/reproduce/analysis/config/menke-demo-year.conf b/reproduce/analysis/config/menke-demo-year.conf new file mode 100644 index 0000000..429b220 --- /dev/null +++ b/reproduce/analysis/config/menke-demo-year.conf @@ -0,0 +1,3 @@ +# This is the demonstration year showing the number of papers studied +# before 1997. +menke-demo-year = 1996 diff --git a/reproduce/analysis/config/verify-outputs.conf b/reproduce/analysis/config/verify-outputs.conf index 4f99661..0542c53 100644 --- a/reproduce/analysis/config/verify-outputs.conf +++ b/reproduce/analysis/config/verify-outputs.conf @@ -1,2 +1,2 @@ # To disable verification of output datasets set this variable to yes -verify-outputs = yes +verify-outputs = diff --git a/reproduce/analysis/make/delete-me.mk b/reproduce/analysis/make/delete-me.mk deleted file mode 100644 index 8b97673..0000000 --- a/reproduce/analysis/make/delete-me.mk +++ /dev/null @@ -1,129 +0,0 @@ -# Dummy Makefile to create a random dataset for plotting. -# -# Copyright (C) 2018-2020 Mohammad Akhlaghi <mohammad@akhlaghi.org> -# -# This Makefile is free software: you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by the -# Free Software Foundation, either version 3 of the License, or (at your -# option) any later version. -# -# This Makefile is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General -# Public License for more details. -# -# A copy of the GNU General Public License is available at -# <http://www.gnu.org/licenses/>. - - - - - -# Dummy dataset -# ------------- -# -# We will use AWK to generate a table showing X and X^2 and draw its plot. -delete-numdir = $(texdir)/delete-me-num -delete-num = $(delete-numdir)/data.txt -$(delete-numdir): | $(texdir); mkdir $@ -$(delete-num): $(pconfdir)/delete-me-num.conf | $(delete-numdir) - - # When the plotted values are re-made, it is necessary to also - # delete the TiKZ externalized files so the plot is also re-made. - rm -f $(tikzdir)/delete-me.pdf - - # Generate the table of random values. - awk 'BEGIN {for(i=1;i<=$(delete-me-num);i+=0.5) print i, i*i; }' > $@ - - - - - -# WFPC2 image PDF -# ----------------- -# -# For an example image, we'll make a PDF copy of the WFPC II image to -# display in the paper. -delete-demodir = $(texdir)/delete-me-demo -$(delete-demodir): | $(texdir); mkdir $@ -delete-pdf = $(delete-demodir)/wfpc2.pdf -$(delete-pdf): $(delete-demodir)/%.pdf: $(indir)/%.fits | $(delete-demodir) - - # When the plotted values are re-made, it is necessary to also - # delete the TiKZ externalized files so the plot is also re-made. - rm -f $(tikzdir)/delete-me-wfpc2.pdf - - # Convert the dataset to a PDF. - astconvertt --colormap=gray --fluxhigh=4 $< -h0 -o$@ - - - - - -# Histogram of WFPC2 image -# ------------------------ -# -# For an example plot, we'll show the pixel value histogram also. -delete-histogram = $(delete-demodir)/wfpc2-hist.txt -$(delete-histogram): $(delete-demodir)/%-hist.txt: $(indir)/%.fits \ - | $(delete-demodir) - - # When the plotted values are re-made, it is necessary to also - # delete the TiKZ externalized files so the plot is also re-made. - rm -f $(tikzdir)/delete-me-wfpc2.pdf - - # Generate the pixel value distribution - aststatistics --lessthan=5 $< -h0 --histogram -o$@ - - - - - -# Basic statistics -# ---------------- -# -# This is just as a demonstration on how to get analysic configuration -# parameters from variables defined in `reproduce/analysis/config/'. -delete-stats = $(delete-demodir)/wfpc2-stats.txt -$(delete-stats): $(delete-demodir)/%-stats.txt: $(indir)/%.fits \ - | $(delete-demodir) - aststatistics $< -h0 --mean --median > $@ - - - - - -# TeX macros -# ---------- -# -# This is how we write the necessary parameters in the final PDF. -# -# NOTE: In LaTeX you cannot use any non-alphabetic character in a variable -# name. -$(mtexdir)/delete-me.tex: $(delete-num) $(delete-pdf) $(delete-histogram) \ - $(delete-stats) - - # Write the number of random values used. - echo "\newcommand{\deletemenum}{$(delete-me-num)}" > $@ - - # Note that since Make variables start with a `$(', if you want to - # use `$' within the shell (not Make), you have to quote any - # occurance of `$' with another `$'. That is why there are `$$' in - # the AWK command below. - # - # Here, we are first using AWK to find the minimum and maximum - # values, then using it again to read each separately to use in the - # macro definition. - mm=$$(awk 'BEGIN{min=99999; max=-min} - !/^#/{if($$2>max) max=$$2; if($$2<min) min=$$2;} - END{print min, max}' $(delete-num)); - v=$$(echo "$$mm" | awk '{printf "%.3f", $$1}'); - echo "\newcommand{\deletememin}{$$v}" >> $@ - v=$$(echo "$$mm" | awk '{printf "%.3f", $$2}'); - echo "\newcommand{\deletememax}{$$v}" >> $@ - - # Write the statistics of the WFPC2 image as a macro. - mean=$$(awk '{printf("%.2f", $$1)}' $(delete-stats)) - echo "\newcommand{\deletemewfpctwomean}{$$mean}" >> $@ - median=$$(awk '{printf("%.2f", $$2)}' $(delete-stats)) - echo "\newcommand{\deletemewfpctwomedian}{$$median}" >> $@ diff --git a/reproduce/analysis/make/demo-plot.mk b/reproduce/analysis/make/demo-plot.mk new file mode 100644 index 0000000..caf77af --- /dev/null +++ b/reproduce/analysis/make/demo-plot.mk @@ -0,0 +1,59 @@ +# Second step of analysis: +# Data for plot of number/fraction of tools per year. +# +# Copyright (C) 2020 Mohammad Akhlaghi <mohammad@akhlaghi.org> +# +# This Makefile is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. +# +# This Makefile is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. See <http://www.gnu.org/licenses/>. + + + + +# Directory to host outputs +# ------------------------- +a2dir = $(texdir)/tools-per-year +$(a2dir):; mkdir $@ + + + + + +# Table for Figure 1C of Menke+20 +# ------------------------------- +a2mk20f1c = $(a2dir)/tools-per-year.txt +$(a2mk20f1c): $(mk20tab3) | $(a2dir) + + # Remove the (possibly) produced figure that is created from this + # table: it is created by LaTeX's TiKZ package, and includes + # multiple files with a fixed prefix. + rm -f $(tikzdir)/figure-tools-per-year* + + # Find the maximum number of papers. + awk '!/^#/{all[$$1]+=$$2; id[$$1]+=$$3} \ + END{ for(year in all) \ + print year, 100*id[year]/all[year], all[year] \ + }' $< \ + > $@ + + + + + +# Final LaTeX macro +$(mtexdir)/demo-plot.tex: $(a2mk20f1c) $(pconfdir)/menke-demo-year.conf + + # Find the first year (first column of first row) of data. + v=$$(awk 'NR==1{print $$1}' $(a2mk20f1c)) + echo "\newcommand{\menkefirstyear}{$$v}" > $@ + + # Find the number of papers in 1996. + v=$$(awk '$$1==$(menke-demo-year){print $$3}' $(a2mk20f1c)) + echo "\newcommand{\menkenumpapersdemocount}{$$v}" >> $@ + echo "\newcommand{\menkenumpapersdemoyear}{$(menke-demo-year)}" >> $@ diff --git a/reproduce/analysis/make/download.mk b/reproduce/analysis/make/download.mk index 07e9f27..cf5bfa4 100644 --- a/reproduce/analysis/make/download.mk +++ b/reproduce/analysis/make/download.mk @@ -50,12 +50,12 @@ # progress at every moment. $(indir):; mkdir $@ downloadwrapper = $(bashdir)/download-multi-try -inputdatasets = $(foreach i, wfpc2, $(indir)/$(i).fits) -$(inputdatasets): $(indir)/%.fits: | $(indir) $(lockdir) +inputdatasets = $(indir)/menke20.xlsx +$(inputdatasets): $(indir)/%: | $(indir) $(lockdir) # Set the necessary parameters for this input file. - if [ $* = wfpc2 ]; then - origname=$(WFPC2IMAGE); url=$(WFPC2URL); mdf=$(WFPC2MD5); + if [ $* = menke20.xlsx ]; then + origname=$(MK20DATA); fullurl=$(MK20URL); mdf=$(MK20MD5); else echo; echo; echo "Not recognized input dataset: '$*.fits'." echo; echo; exit 1 @@ -73,7 +73,7 @@ $(inputdatasets): $(indir)/%.fits: | $(indir) $(lockdir) else touch $(lockdir)/download $(downloadwrapper) "wget --no-use-server-timestamps -O" \ - $(lockdir)/download $$url/$$origname $@ + $(lockdir)/download $$fullurl $@ fi # Check the md5 sum to see if this is the proper dataset. @@ -94,5 +94,5 @@ $(inputdatasets): $(indir)/%.fits: | $(indir) $(lockdir) # # It is very important to mention the address where the data were # downloaded in the final report. -$(mtexdir)/download.tex: $(pconfdir)/INPUTS.conf | $(mtexdir) - echo "\\newcommand{\\wfpctwourl}{$(WFPC2URL)}" > $@ +$(mtexdir)/download.tex: $(indir)/menke20.xlsx | $(mtexdir) + echo "\newcommand{\menketwentyurl}{$(MK20URL)}" > $@ diff --git a/reproduce/analysis/make/format.mk b/reproduce/analysis/make/format.mk new file mode 100644 index 0000000..868c411 --- /dev/null +++ b/reproduce/analysis/make/format.mk @@ -0,0 +1,82 @@ +# First step of analysis: +# Prepare the data, return basic values. +# +# As a demonstration analysis to go with the paper, we use the data from +# Menke 2020 (DOI:10.1101/2020.01.15.908111). This is a relevant paper +# because it provides interesting statistics about tools and methods used +# in scientific papers. +# +# Copyright (C) 2020 Mohammad Akhlaghi <mohammad@akhlaghi.org> +# +# This Makefile is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. +# +# This Makefile is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. See <http://www.gnu.org/licenses/>. + + + + +# Save the "Table 3" spreadsheet from the downloaded `.xlsx' file into a +# simple plain-text file that is easy to use. +a1dir = $(BDIR)/analysis1 +mk20tab3 = $(a1dir)/menke20-table-3.txt +$(a1dir):; mkdir $@ +$(mk20tab3): $(indir)/menke20.xlsx | $(a1dir) + + # Set a base-name for the table-3 data. + base=$(basename $(notdir $<))-table-3 + + # Unfortunately XLSX I/O only works when the input and output are + # in the directory it is running. So first, we need to switch to + # the input directory, run it, then put our desired output where we + # want and delete the extra files. + topdir=$$(pwd) + cd $(indir) + xlsxio_xlsx2csv $(notdir $<) + cp $(notdir $<)."Table 3 All by journal by year".csv $$base.csv + rm $(notdir $<).*.csv + cd $$topdir + + # Read the necessary information. Note that we are dealing with a + # CSV (comma-separated value) file. But when there are commas in a + # string, quotation signs are put around it. The `FPAT' values is + # fully described in the GNU AWK manual. In short, it ensures that + # if there is a comma in the middle of double-quotes, it doesn't + # count as a delimter. + echo "# Column 1: YEAR [counter, i16] Year of journal's publication." > $@.tmp + echo "# Column 2: NUM_PAPERS [counter, i16] Number of studied papers in that journal." >> $@.tmp + echo "# Column 3: NUM_PAPERS_WITH_TOOLS [counter, i16] Number of papers with an identified tool." >> $@.tmp + echo "# Column 4: NUM_ID_TOOLS [counter, i16] Number of software/tools that were identified." >> $@.tmp + echo "# Column 5: JOURNAL_NAME [string, str150] Name of journal." >> $@.tmp + awk 'NR>1{printf("%-10d%-10d%-10d%-10d %s\n", $$2, $$3, $$3*$$NF, $$(NF-1), $$1)}' \ + FPAT='([^,]+)|("[^"]+")' $(indir)/$$base.csv >> $@.tmp + + # Set the temporary file as the final target. This was done so if + # there is any possible crash in the steps above, this rule is + # re-run (its final target isn't rebuilt). + mv $@.tmp $@ + + + + + +# Main LaTeX macro file +$(mtexdir)/format.tex: $(mk20tab3) + + # Count the total number of papers in their study. + v=$$(awk '!/^#/{c+=$$2} END{print c}' $(mk20tab3)) + echo "\newcommand{\menkenumpapers}{$$v}" > $@ + + # Count how many unique journals there were in the study. Note that + # the `31' comes because we put 10 characters for each numeric + # column and separated the last numeric column from the string + # column with a space. If the number of numeric columns change in + # the future, the `31' also has to change. + v=$$(awk 'BEGIN{FIELDWIDTHS="41 10000"} !/^#/{print $$2}' \ + $(mk20tab3) | uniq | wc -l) + echo "\newcommand{\menkenumjournals}{$$v}" >> $@ diff --git a/reproduce/analysis/make/initialize.mk b/reproduce/analysis/make/initialize.mk index 79f9266..ce4e488 100644 --- a/reproduce/analysis/make/initialize.mk +++ b/reproduce/analysis/make/initialize.mk @@ -132,6 +132,7 @@ curdir := $(shell echo $$(pwd)) # we are also going to overwrite `TEXINPUTS' just before `pdflatex'. .ONESHELL: .SHELLFLAGS = -ec +export TERM=xterm export TEXINPUTS := export CCACHE_DISABLE := 1 export PATH := $(installdir)/bin diff --git a/reproduce/analysis/make/paper.mk b/reproduce/analysis/make/paper.mk index 8a14573..67db364 100644 --- a/reproduce/analysis/make/paper.mk +++ b/reproduce/analysis/make/paper.mk @@ -44,7 +44,7 @@ $(mtexdir)/project.tex: $(mtexdir)/verify.tex # If no PDF is requested, or if LaTeX isn't available, don't # continue to building the final PDF. Otherwise, merge all the TeX # macros into one for building the PDF. - @if [ -f .local/bin/pdflatex ] && [ x"$(pdf-build-final)" != x ]; then + @if [ -f .local/bin/lualatex ] && [ x"$(pdf-build-final)" != x ]; then # Put a LaTeX input command for all the necessary macro files. rm -f $(mtexdir)/project.tex @@ -100,7 +100,7 @@ $(texbdir)/paper.bbl: tex/src/references.tex $(mtexdir)/dependencies-bib.tex \ p=$$(pwd) export TEXINPUTS=$$p: cd $(texbdir); - pdflatex -shell-escape -halt-on-error $$p/paper.tex + lualatex -shell-escape -halt-on-error $$p/paper.tex biber paper fi @@ -127,7 +127,7 @@ paper.pdf: $(mtexdir)/project.tex paper.tex $(texbdir)/paper.bbl p=$$(pwd) export TEXINPUTS=$$p: cd $(texbdir) - pdflatex -shell-escape -halt-on-error $$p/paper.tex + lualatex -shell-escape -halt-on-error $$p/paper.tex # Come back to the top project directory and copy the built PDF # file here. diff --git a/reproduce/analysis/make/top-make.mk b/reproduce/analysis/make/top-make.mk index 6c940b8..30d537a 100644 --- a/reproduce/analysis/make/top-make.mk +++ b/reproduce/analysis/make/top-make.mk @@ -112,7 +112,8 @@ endif # wild-card like the configuration Makefiles). makesrc = initialize \ download \ - delete-me \ + format \ + demo-plot \ verify \ paper diff --git a/reproduce/analysis/make/verify.mk b/reproduce/analysis/make/verify.mk index d11dcbf..f0bcf10 100644 --- a/reproduce/analysis/make/verify.mk +++ b/reproduce/analysis/make/verify.mk @@ -114,8 +114,7 @@ $(mtexdir)/verify.tex: $(foreach s, $(verify-dep), $(mtexdir)/$(s).tex) # Verify TeX macros (the values that go into the PDF text). for m in $(verify-check); do file=$(mtexdir)/$$m.tex - if [ $$m == download ]; then s=6749e17ce606d57d30cebdbc1a5d23ad - elif [ $$m == delete-me ]; then s=711e2f7fa1f16ecbeeb3df6bcb4ec705 + if [ $$m == download ]; then s=XXXXX else echo; echo "'$$m' not recognized."; exit 1 fi $(call verify-txt-no-comments-leading-space, $$file, $$s) diff --git a/reproduce/software/config/installation/TARGETS.conf b/reproduce/software/config/installation/TARGETS.conf index d2d5e2c..bb402be 100644 --- a/reproduce/software/config/installation/TARGETS.conf +++ b/reproduce/software/config/installation/TARGETS.conf @@ -36,7 +36,7 @@ # Programs and libraries. -top-level-programs = gnuastro +top-level-programs = xlsxio # Python libraries/modules. -top-level-python = astropy +top-level-python = diff --git a/reproduce/software/config/installation/texlive-packages.conf b/reproduce/software/config/installation/texlive-packages.conf index c53e170..6760eba 100644 --- a/reproduce/software/config/installation/texlive-packages.conf +++ b/reproduce/software/config/installation/texlive-packages.conf @@ -21,4 +21,7 @@ texlive-packages = tex fancyhdr ec newtx fontaxes xkeyval etoolbox xcolor \ preprint ulem biblatex biber logreq pgf pgfplots fp \ courier tex-gyre txfonts times csquotes kastrup \ trimspaces pdftexcmds pdfescape letltxmacro bitset \ - mweights + mweights \ + \ + alegreya enumitem fontspec lastpage listings environ \ + tcolorbox diff --git a/reproduce/software/make/high-level.mk b/reproduce/software/make/high-level.mk index 274bcdf..5f1d2b2 100644 --- a/reproduce/software/make/high-level.mk +++ b/reproduce/software/make/high-level.mk @@ -1157,7 +1157,8 @@ $(ibidir)/xlsxio: $(ibidir)/expat \ | $(tdir)/xlsxio-$(xlsxio-version).tar.gz export LDFLAGS="-lbz2 -lbsd"; \ $(call cbuild, xlsxio-$(xlsxio-version), static) \ - && echo "XLSX I/O $(xlsxio-version)" > $@ + && echo "XLSX I/O $(xlsxio-version)" > $ + diff --git a/reproduce/software/shell/configure.sh b/reproduce/software/shell/configure.sh index ca95a92..6e1b5fb 100755 --- a/reproduce/software/shell/configure.sh +++ b/reproduce/software/shell/configure.sh @@ -712,10 +712,10 @@ if [ x"$input_dir" = x ]; then else indir=$input_dir fi -wfpc2name=$(awk '!/^#/ && $1=="WFPC2IMAGE" {print $3}' $adir/INPUTS.conf) -wfpc2md5=$(awk '!/^#/ && $1=="WFPC2MD5" {print $3}' $adir/INPUTS.conf) -wfpc2size=$(awk '!/^#/ && $1=="WFPC2SIZE" {print $3}' $adir/INPUTS.conf) -wfpc2url=$(awk '!/^#/ && $1=="WFPC2URL" {print $3}' $adir/INPUTS.conf) +mk20name=$(awk '!/^#/ && $1=="MK20DATA" {print $3}' $adir/INPUTS.conf) +mk20md5=$(awk '!/^#/ && $1=="MK20MD5" {print $3}' $adir/INPUTS.conf) +mk20size=$(awk '!/^#/ && $1=="MK20SIZE" {print $3}' $adir/INPUTS.conf) +mk20url=$(awk '!/^#/ && $1=="MK20URL" {print $3}' $adir/INPUTS.conf) if [ $rewritepconfig = yes ] && [ x"$input_dir" = x ]; then cat <<EOF @@ -728,10 +728,9 @@ please specify the directory hosting them on this system. If you don't, they will be downloaded automatically. Each file is shown with its total volume and its 128-bit MD5 checksum in parenthesis. - $wfpc2name ($wfpc2size, $wfpc2md5): - A 100x100 Hubble Space Telescope WFPC II image used in the FITS - standard webpage as a demonstration of this file format. - URL: $wfpc2url/$wfpc2name + $mk20name ($mk20size, $mk20md5): + Supplementary dataset to http://dx.doi.org/10.1101/2020.01.15.908111 + URL: $mk20url NOTE I: This directory, or the datasets above, are optional. If it doesn't exist, the files will be downloaded in the build directory and used. |