diff options
Diffstat (limited to 'reproduce/analysis')
-rwxr-xr-x | reproduce/analysis/bash/download-multi-try | 14 | ||||
-rw-r--r-- | reproduce/analysis/config/INPUTS.conf | 113 | ||||
-rw-r--r-- | reproduce/analysis/config/metadata.conf | 2 | ||||
-rw-r--r-- | reproduce/analysis/config/pdf-build.conf | 2 | ||||
-rw-r--r-- | reproduce/analysis/config/verify-outputs.conf | 2 | ||||
-rw-r--r-- | reproduce/analysis/make/demo-plot.mk | 28 | ||||
-rw-r--r-- | reproduce/analysis/make/download.mk | 86 | ||||
-rw-r--r-- | reproduce/analysis/make/format.mk | 44 | ||||
-rw-r--r-- | reproduce/analysis/make/initialize.mk | 221 | ||||
-rw-r--r-- | reproduce/analysis/make/paper.mk | 130 | ||||
-rw-r--r-- | reproduce/analysis/make/prepare.mk | 47 | ||||
-rw-r--r-- | reproduce/analysis/make/top-make.mk | 20 | ||||
-rw-r--r-- | reproduce/analysis/make/top-prepare.mk | 20 | ||||
-rw-r--r-- | reproduce/analysis/make/verify.mk | 42 |
14 files changed, 404 insertions, 367 deletions
diff --git a/reproduce/analysis/bash/download-multi-try b/reproduce/analysis/bash/download-multi-try index 76eb859..994a8fa 100755 --- a/reproduce/analysis/bash/download-multi-try +++ b/reproduce/analysis/bash/download-multi-try @@ -6,7 +6,7 @@ # # $ /path/to/download-multi-try downloader lockfile input-url downloaded-name # -# NOTE: The `downloader' must contain the option to specify the output name +# NOTE: The 'downloader' must contain the option to specify the output name # in its end. For example "wget -O". Any other option can also be placed in # the middle. # @@ -20,13 +20,13 @@ # world, downloading is done much faster in serial, not in parallel. But # the project's processing may be done in parallel (with multiple threads # needing to download different files at the same time). Therefore, this -# script uses the `flock' program to only do one download at a time. To +# script uses the 'flock' program to only do one download at a time. To # benefit from it, any call to this script must be given the same lock # file. If your system has multiple ports to the internet, or for any -# reason, you don't want to use a lock file, set the `lockfile' name to -# `nolock'. +# reason, you don't want to use a lock file, set the 'lockfile' name to +# 'nolock'. # -# Copyright (C) 2019-2021 Mohammad Akhlaghi <mohammad@akhlaghi.org> +# Copyright (C) 2019-2022 Mohammad Akhlaghi <mohammad@akhlaghi.org> # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -114,9 +114,9 @@ while [ ! -f "$outname" ]; do sleep $tstep fi - # Attempt downloading the file. Note that the `downloader' ends with + # Attempt downloading the file. Note that the 'downloader' ends with # the respective option to specify the output name. For example "wget - # -O" (so `outname', that comes after it) will be the name of the + # -O" (so 'outname', that comes after it) will be the name of the # downloaded file. if [ x"$lockfile" = xnolock ]; then if ! $downloader $outname $inurl; then rm -f $outname; fi diff --git a/reproduce/analysis/config/INPUTS.conf b/reproduce/analysis/config/INPUTS.conf index fd8ac53..f3d1cd4 100644 --- a/reproduce/analysis/config/INPUTS.conf +++ b/reproduce/analysis/config/INPUTS.conf @@ -1,42 +1,70 @@ -# Input files necessary for this project, the variables defined in this -# file are primarily used in 'reproduce/analysis/make/download.mk'. See -# there for precise usage of the variables. But comments are also provided -# here. -# -# Necessary variables for each input dataset are listed below. Its good -# that all the variables of each file have the same base-name (in the -# example below 'DEMO') with descriptive suffixes, also put a short comment -# above each group of variables for each dataset, shortly explaining what -# it is. -# -# 1) Local file name ('DEMO-DATA' below): this is the name of the dataset -# on the local system (in 'INDIR', given at configuration time). It is -# recommended that it be the same name as the online version of the -# file like the case here (note how this variable is used in 'DEMO-URL' -# for the dataset's full URL). However, this is not always possible, so -# the local and server filenames may be different. Ultimately, the file -# name is irrelevant, we check the integrity with the checksum. -# -# 2) The MD5 checksum of the file ('DEMO-MD5' below): this is very -# important for an automatic verification of the file. You can -# calculate it by running 'md5sum' on your desired file. You can also -# use any other checksum tool that you prefer, just be sure to correct -# the respective command in 'reproduce/analysis/make/download.mk'. -# -# 3) The human-readable size of the file ('DEMO-SIZE' below): this is an -# optional variable, mainly to help a reader of your project get a -# sense of the volume they need to download if they don't already have -# the dataset. So it is highly recommended to add it (future readers of -# your project's source will appreciate it!). You can get it from the -# output of 'ls -lh' command on the file. Optionally you can use it in -# messages during the configuration phase (when Maneage asks for the -# input data directory), along with other info about the file(s). -# -# 4) The full dataset URL ('DEMO-URL' below): this is the full URL -# (including the file-name) that can be used to download the dataset -# when necessary. Also, see the description above on local filename. -# -# Copyright (C) 2018-2021 Mohammad Akhlaghi <mohammad@akhlaghi.org> +# This project's input file information (metadata). +# +# For each input (external) data file that is used within the project, +# three variables are suggested here (two of them are mandatory). These +# variables will be used by 'reproduce/analysis/make/download.mk' to import +# the dataset into the project (within the build directory): +# +# - If the file already exists locally in '$(INDIR)' (the optional input +# directory that may have been specified at configuration time with +# '--input-dir'), a symbolic link will be added in '$(indir)' (in the +# build directory). A symbolic link is used to avoid extra storage when +# files are large. +# +# - If the file doesn't exist in '$(INDIR)', or no input directory was +# specified at configuration time, then the file is downloaded from a +# specific URL. +# +# In both cases, before placing the file (or its link) in the build +# directory, 'reproduce/analysis/make/download.mk' will check the SHA256 +# checksum of the dataset and if it differs from the pre-defined value (set +# for that file, here), it will abort (since this is not the intended +# dataset). +# +# Therefore, the two variables specifying the URL and SHA256 checksum of +# the file are MANDATORY. The third variable (INPUT-%-size) showing the +# human-readable size of the file (from 'ls -lh') is optional (but +# recommended: because it gives future scientists to get a feeling of the +# volume of data they need to input: will become important if the +# size/number of files is large). +# +# The naming convension is critical for the input files to be properly +# imported into the project. In the patterns below, the '%' is the full +# file name (including its prefix): for example in the demo input of this +# file in the 'maneage' branch, we have 'INPUT-wfpc2.fits-sha256': +# therefore, the input file (within the project's '$(indir)') is called +# 'wfpc2.fits'. This allows you to simply set '$(indir)/wfpc2.fits' as the +# pre-requisite of any recipe that needs the input file: you will rarely +# (if at all!) need to use these variables directly. +# +# INPUT-%-sha256: The sha256 checksum of the file. You can generate the +# SHA256 checksum of a file with the 'sha256sum FILENAME' +# command (where 'FILENAME' is the name of your +# file). this is very important for an automatic +# verification of the file: that it hasn't changed +# between different runs of the project (locally or in +# the URL). There are more robust checksum algorithms +# like the 'SHA' standards. +# +# INPUT-%-url: The URL to download the file if it is not available +# locally. It can happen that during the first phases of +# your project the data aren't yet public. In this case, you +# set a phony URL like this (just as a clear place-holder): +# 'https://this.file/is/not/yet/public'. +# +# INPUT-%-size: The human-readable size of the file (output of 'ls +# -lh'). This is not used by default but can help other +# scientists who would like to run your project get a +# good feeling of the necessary network and storage +# capacity that is necessary to start the project. +# +# The input dataset's name (that goes into the '%') can be different from +# the URL's file name (last component of the URL, after the last '/'). Just +# note that it is assumed that the local copy (outside of your project) is +# also called '%' (if your local copy of the input dataset and the only +# repository names are the same, be sure to set '%' accordingly). +# +# Copyright (C) 2018-2022 Mohammad Akhlaghi <mohammad@akhlaghi.org> # # Copying and distribution of this file, with or without modification, are # permitted in any medium without royalty provided the copyright notice and @@ -48,7 +76,6 @@ # Dataset used in this analysis and its checksum for integrity checking. -MK20DATA = menke20.xlsx -MK20MD5 = 8e4eee64791f351fec58680126d558a0 -MK20SIZE = 1.9MB -MK20URL = https://www.biorxiv.org/content/biorxiv/early/2020/01/18/2020.01.15.908111/DC1/embed/media-1.xlsx +INPUT-menke20.xlsx-size = 1.9M +INPUT-menke20.xlsx-url = https://www.biorxiv.org/content/biorxiv/early/2020/01/18/2020.01.15.908111/DC1/embed/media-1.xlsx +INPUT-menke20.xlsx-sha256 = 7839cdc2946134773ffc401cbcc78fb58fc489d2caad65375c85d605b2f8b13e diff --git a/reproduce/analysis/config/metadata.conf b/reproduce/analysis/config/metadata.conf index caac5c9..f570340 100644 --- a/reproduce/analysis/config/metadata.conf +++ b/reproduce/analysis/config/metadata.conf @@ -15,7 +15,7 @@ # and the copyright license name and standard link to the fully copyright # license. # -# Copyright (C) 2020-2021 Mohammad Akhlaghi <mohammad@akhlaghi.org> +# Copyright (C) 2020-2022 Mohammad Akhlaghi <mohammad@akhlaghi.org> # # Copying and distribution of this file, with or without modification, are # permitted in any medium without royalty provided the copyright notice and diff --git a/reproduce/analysis/config/pdf-build.conf b/reproduce/analysis/config/pdf-build.conf index 015bf2e..a57b529 100644 --- a/reproduce/analysis/config/pdf-build.conf +++ b/reproduce/analysis/config/pdf-build.conf @@ -12,7 +12,7 @@ # LaTeX. Otherwise, a notice will just printed that, no PDF will be # created. # -# Copyright (C) 2018-2021 Mohammad Akhlaghi <mohammad@akhlaghi.org> +# Copyright (C) 2018-2022 Mohammad Akhlaghi <mohammad@akhlaghi.org> # # Copying and distribution of this file, with or without modification, are # permitted in any medium without royalty provided the copyright notice and diff --git a/reproduce/analysis/config/verify-outputs.conf b/reproduce/analysis/config/verify-outputs.conf index d96f293..37fc43c 100644 --- a/reproduce/analysis/config/verify-outputs.conf +++ b/reproduce/analysis/config/verify-outputs.conf @@ -1,6 +1,6 @@ # To enable verification of output datasets set this variable to 'yes'. # -# Copyright (C) 2019-2021 Mohammad Akhlaghi <mohammad@akhlaghi.org> +# Copyright (C) 2019-2022 Mohammad Akhlaghi <mohammad@akhlaghi.org> # # Copying and distribution of this file, with or without modification, are # permitted in any medium without royalty provided the copyright notice and diff --git a/reproduce/analysis/make/demo-plot.mk b/reproduce/analysis/make/demo-plot.mk index 53e1918..13b0d45 100644 --- a/reproduce/analysis/make/demo-plot.mk +++ b/reproduce/analysis/make/demo-plot.mk @@ -1,7 +1,7 @@ # Second step of analysis: # Data for plot of number/fraction of tools per year. # -# Copyright (C) 2020-2021 Mohammad Akhlaghi <mohammad@akhlaghi.org> +# Copyright (C) 2020-2022 Mohammad Akhlaghi <mohammad@akhlaghi.org> # # This Makefile is free software: you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the @@ -22,16 +22,16 @@ a2mk20f1c = $(tex-publish-dir)/tools-per-year.txt $(a2mk20f1c): $(mk20tab3) | $(tex-publish-dir) - # Remove the (possibly) produced figure that is created from this - # table: it is created by LaTeX's TiKZ package, and includes - # multiple files with a fixed prefix. +# Remove the (possibly) produced figure that is created from this +# table: it is created by LaTeX's TiKZ package, and includes multiple +# files with a fixed prefix. rm -f $(tikzdir)/figure-tools-per-year* - # Write the column metadata in a temporary file name (appending - # '.tmp' to the actual target name). Once all steps are done, it is - # renamed to the final target. We do this because if there is an - # error in the middle, Make will not consider the job to be - # complete and will stop here. +# Write the column metadata in a temporary file name (appending +# '.tmp' to the actual target name). Once all steps are done, it is +# renamed to the final target. We do this because if there is an +# error in the middle, Make will not consider the job to be complete +# and will stop here. echo "# Data of plot showing fraction of papers that mentioned software tools" > $@.tmp echo "# per year to demonstrate the features of Maneage (MANaging data linEAGE)." >> $@.tmp >> $@.tmp @@ -48,7 +48,7 @@ $(a2mk20f1c): $(mk20tab3) | $(tex-publish-dir) $(call print-general-metadata, $@.tmp) - # Find the maximum number of papers. +# Find the maximum number of papers. awk '!/^#/{all[$$1]+=$$2; id[$$1]+=$$3} \ END{ for(year in all) \ printf("%-7d%-10.3f%d\n", year, 100*id[year]/all[year], \ @@ -56,7 +56,7 @@ $(a2mk20f1c): $(mk20tab3) | $(tex-publish-dir) }' $< \ >> $@.tmp - # Write it into the final target +# Write it into the final target mv $@.tmp $@ @@ -66,15 +66,15 @@ $(a2mk20f1c): $(mk20tab3) | $(tex-publish-dir) # Final LaTeX macro $(mtexdir)/demo-plot.tex: $(a2mk20f1c) $(pconfdir)/demo-year.conf - # Find the first year (first column of first row) of data. +# Find the first year (first column of first row) of data. v=$$(awk '!/^#/ && c==0{c++; print $$1}' $(a2mk20f1c)) echo "\newcommand{\menkefirstyear}{$$v}" > $@ - # Find the number of rows in the plotted table. +# Find the number of rows in the plotted table. v=$$(awk '!/^#/{c++} END{print c}' $(a2mk20f1c)) echo "\newcommand{\menkenumyears}{$$v}" >> $@ - # Find the number of papers in 1996. +# Find the number of papers in 1996. v=$$(awk '$$1==$(menke-demo-year){print $$3}' $(a2mk20f1c)) echo "\newcommand{\menkenumpapersdemocount}{$$v}" >> $@ echo "\newcommand{\menkenumpapersdemoyear}{$(menke-demo-year)}" >> $@ diff --git a/reproduce/analysis/make/download.mk b/reproduce/analysis/make/download.mk index ea70fca..7110c8f 100644 --- a/reproduce/analysis/make/download.mk +++ b/reproduce/analysis/make/download.mk @@ -5,7 +5,7 @@ # recipes in this Makefile all use a single file lock to have one download # script running at every instant. # -# Copyright (C) 2018-2021 Mohammad Akhlaghi <mohammad@akhlaghi.org> +# Copyright (C) 2018-2022 Mohammad Akhlaghi <mohammad@akhlaghi.org> # # This Makefile is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -27,70 +27,68 @@ # Download input data # -------------------- # -# The input dataset properties are defined in -# `$(pconfdir)/INPUTS.conf'. For this template we only have one dataset to -# enable easy processing, so all the extra checks in this rule may seem -# redundant. +# 'reproduce/analysis/config/INPUTS.conf' contains the input dataset +# properties. In most cases, you will not need to edit this rule (or +# file!). Simply follow the instructions of 'INPUTS.conf' and set the +# variables names according to the described standards. # -# In a real project, you will need more than one dataset. In that case, -# just add them to the target list and add an `elif' statement to define it -# in the recipe. -# -# Files in a server usually have very long names, which are mainly designed -# for helping in data-base management and being generic. Since Make uses -# file names to identify which rule to execute, and the scope of this -# research project is much less than the generic survey/dataset, it is -# easier to have a simple/short name for the input dataset and work with -# that. In the first condition of the recipe below, we connect the short -# name with the raw database name of the dataset. +# TECHNICAL NOTE on the '$(foreach, n ...)' loop of 'inputdatasets': we are +# using several (relatively complex!) features particular to Make: In GNU +# Make, '.VARIABLES' "... expands to a list of the names of all global +# variables defined so far" (from the "Other Special Variables" section of +# the GNU Make manual). Assuming that the pattern 'INPUT-%-sha256' is only +# used for input files, we find all the variables that contain the input +# file name (the '%' is the filename). Finally, using the +# pattern-substitution function ('patsubst'), we remove the fixed string at +# the start and end of the variable name. # # Download lock file: Most systems have a single connection to the # internet, therefore downloading is inherently done in series. As a # result, when more than one dataset is necessary for download, if they are # done in parallel, the speed will be slower than downloading them in -# series. We thus use the `flock' program to tie/lock the downloading +# series. We thus use the 'flock' program to tie/lock the downloading # process with a file and make sure that only one downloading event is in # progress at every moment. $(indir):; mkdir $@ downloadwrapper = $(bashdir)/download-multi-try -inputdatasets = $(indir)/menke20.xlsx +inputdatasets = $(foreach i, \ + $(patsubst INPUT-%-sha256,%, \ + $(filter INPUT-%-sha256,$(.VARIABLES))), \ + $(indir)/$(i)) $(inputdatasets): $(indir)/%: | $(indir) $(lockdir) - # Set the necessary parameters for this input file. - if [ $* = menke20.xlsx ]; then - localname=$(MK20DATA); url=$(MK20URL); mdf=$(MK20MD5); - else - echo; echo; echo "Not recognized input dataset: '$*'." - echo; echo; exit 1 - fi +# Set the necessary parameters for this input file as shell variables +# (to help in readability). + url=$(INPUT-$*-url) + sha=$(INPUT-$*-sha256) - # Download (or make the link to) the input dataset. If the file - # exists in `INDIR', it may be a symbolic link to some other place - # in the filesystem. To avoid too many links when using these files - # during processing, we'll use `readlink -f' so the link we make - # here points to the final file directly (note that `readlink' is - # part of GNU Coreutils). If its not a link, the `readlink' part - # has no effect. +# Download (or make the link to) the input dataset. If the file +# exists in 'INDIR', it may be a symbolic link to some other place in +# the filesystem. To avoid too many links when using these files +# during processing, we'll use 'readlink -f' so the link we make here +# points to the final file directly (note that 'readlink' is part of +# GNU Coreutils). If its not a link, the 'readlink' part has no +# effect. unchecked=$@.unchecked - if [ -f $(INDIR)/$$localname ]; then - ln -fs $$(readlink -f $(INDIR)/$$localname) $$unchecked + if [ -f $(INDIR)/$* ]; then + ln -fs $$(readlink -f $(INDIR)/$*) $$unchecked else touch $(lockdir)/download $(downloadwrapper) "wget --no-use-server-timestamps -O" \ $(lockdir)/download $$url $$unchecked fi - # Check the md5 sum to see if this is the proper dataset. - sum=$$(md5sum $$unchecked | awk '{print $$1}') - if [ $$sum = $$mdf ]; then +# Check the checksum to see if this is the proper dataset. + sum=$$(sha256sum $$unchecked | awk '{print $$1}') + if [ $$sum = $$sha ]; then mv $$unchecked $@ echo "Integrity confirmed, using $@ in this project." else echo; echo; - echo "Wrong MD5 checksum for input file '$$localname':" + echo "Wrong SHA256 checksum for input file '$*':" echo " File location: $$unchecked"; \ - echo " Expected MD5 checksum: $$mdf"; \ - echo " Calculated MD5 checksum: $$sum"; \ + echo " Expected SHA256 checksum: $$sha"; \ + echo " Calculated SHA256 checksum: $$sum"; \ echo; exit 1 fi @@ -104,7 +102,7 @@ $(inputdatasets): $(indir)/%: | $(indir) $(lockdir) # It is very important to mention the address where the data were # downloaded in the final report. $(mtexdir)/download.tex: $(indir)/menke20.xlsx | $(mtexdir) - echo "\newcommand{\menketwentyxlsxname}{$(MK20DATA)}" > $@ - echo "\newcommand{\menketwentychecksum}{$(MK20MD5)}" >> $@ - echo "\newcommand{\menketwentybytesize}{$(MK20SIZE)}" >> $@ - echo "\newcommand{\menketwentyurl}{$(MK20URL)}" >> $@ + echo "\newcommand{\menketwentyxlsxname}{menke20.xlsx}" > $@ + echo "\newcommand{\menketwentychecksum}{$(INPUT-menke20.xlsx-sha256)}" >> $@ + echo "\newcommand{\menketwentybytesize}{$(INPUT-menke20.xlsx-size)}" >> $@ + echo "\newcommand{\menketwentyurl}{$(INPUT-menke20.xlsx-url)}" >> $@ diff --git a/reproduce/analysis/make/format.mk b/reproduce/analysis/make/format.mk index fd4060a..979475f 100644 --- a/reproduce/analysis/make/format.mk +++ b/reproduce/analysis/make/format.mk @@ -6,7 +6,7 @@ # because it provides interesting statistics about tools and methods used # in scientific papers. # -# Copyright (C) 2020-2021 Mohammad Akhlaghi <mohammad@akhlaghi.org> +# Copyright (C) 2020-2022 Mohammad Akhlaghi <mohammad@akhlaghi.org> # # This Makefile is free software: you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the @@ -28,13 +28,13 @@ mk20tab3 = $(a1dir)/table-3.txt $(a1dir):; mkdir $@ $(mk20tab3): $(indir)/menke20.xlsx | $(a1dir) - # Set a base-name for the table-3 data. +# Set a base-name for the table-3 data. base=$(basename $(notdir $<))-table-3 - # Unfortunately XLSX I/O only works when the input and output are - # in the directory it is running. So first, we need to switch to - # the input directory, run it, then put our desired output where we - # want and delete the extra files. +# Unfortunately XLSX I/O only works when the input and output are in +# the directory it is running. So first, we need to switch to the +# input directory, run it, then put our desired output where we want +# and delete the extra files. topdir=$$(pwd) cd $(indir) xlsxio_xlsx2csv $(notdir $<) @@ -42,12 +42,12 @@ $(mk20tab3): $(indir)/menke20.xlsx | $(a1dir) rm $(notdir $<).*.csv cd $$topdir - # Read the necessary information. Note that we are dealing with a - # CSV (comma-separated value) file. But when there are commas in a - # string, quotation signs are put around it. The `FPAT' values is - # fully described in the GNU AWK manual. In short, it ensures that - # if there is a comma in the middle of double-quotes, it doesn't - # count as a delimter. +# Read the necessary information. Note that we are dealing with a CSV +# (comma-separated value) file. But when there are commas in a +# string, quotation signs are put around it. The `FPAT' values is +# fully described in the GNU AWK manual. In short, it ensures that if +# there is a comma in the middle of double-quotes, it doesn't count +# as a delimter. echo "# Column 1: YEAR [counter, i16] Year of journal's publication." > $@.tmp echo "# Column 2: NUM_PAPERS [counter, i16] Number of studied papers in that journal." >> $@.tmp echo "# Column 3: NUM_PAPERS_WITH_TOOLS [counter, i16] Number of papers with an identified tool." >> $@.tmp @@ -56,9 +56,9 @@ $(mk20tab3): $(indir)/menke20.xlsx | $(a1dir) awk 'NR>1{printf("%-10d%-10d%-10d%-10d %s\n", $$2, $$3, $$3*$$NF, $$(NF-1), $$1)}' \ FPAT='([^,]+)|("[^"]+")' $(indir)/$$base.csv >> $@.tmp - # Set the temporary file as the final target. This was done so if - # there is any possible crash in the steps above, this rule is - # re-run (its final target isn't rebuilt). +# Set the temporary file as the final target. This was done so if +# there is any possible crash in the steps above, this rule is re-run +# (its final target isn't rebuilt). mv $@.tmp $@ @@ -68,19 +68,19 @@ $(mk20tab3): $(indir)/menke20.xlsx | $(a1dir) # Main LaTeX macro file $(mtexdir)/format.tex: $(mk20tab3) - # Count the total number of papers in their study. +# Count the total number of papers in their study. v=$$(awk '!/^#/{c+=$$2} END{print c}' $(mk20tab3)) echo "\newcommand{\menkenumpapers}{$$v}" > $@ - # Count how many unique journals there were in the study. Note that - # the `31' comes because we put 10 characters for each numeric - # column and separated the last numeric column from the string - # column with a space. If the number of numeric columns change in - # the future, the `31' also has to change. +# Count how many unique journals there were in the study. Note that +# the `31' comes because we put 10 characters for each numeric column +# and separated the last numeric column from the string column with a +# space. If the number of numeric columns change in the future, the +# `31' also has to change. v=$$(awk 'BEGIN{FIELDWIDTHS="41 10000"} !/^#/{print $$2}' \ $(mk20tab3) | uniq | wc -l) echo "\newcommand{\menkenumjournals}{$$v}" >> $@ - # Count how many rows the original catalog has. +# Count how many rows the original catalog has. v=$$(awk '!/^#/{c++} END{print c}' $(mk20tab3)) echo "\newcommand{\menkenumorigrows}{$$v}" >> $@ diff --git a/reproduce/analysis/make/initialize.mk b/reproduce/analysis/make/initialize.mk index bc73df8..7f0c514 100644 --- a/reproduce/analysis/make/initialize.mk +++ b/reproduce/analysis/make/initialize.mk @@ -1,6 +1,6 @@ # Project initialization. # -# Copyright (C) 2018-2021 Mohammad Akhlaghi <mohammad@akhlaghi.org> +# Copyright (C) 2018-2022 Mohammad Akhlaghi <mohammad@akhlaghi.org> # # This Makefile is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -25,10 +25,10 @@ # Basic directories that are used throughout the project. # # Locks are used to make sure that an operation is done in series not in -# parallel (even if Make is run in parallel with the `-j' option). The most +# parallel (even if Make is run in parallel with the '-j' option). The most # common case is downloads which are better done in series and not in # parallel. Also, some programs may not be thread-safe, therefore it will -# be necessary to put a lock on them. This project uses the `flock' program +# be necessary to put a lock on them. This project uses the 'flock' program # to achieve this. # # To help with modularity and clarity of the build directory (not mixing @@ -43,7 +43,7 @@ bsdir=$(BDIR)/software texdir = $(badir)/tex lockdir = $(bsdir)/locks indir = $(badir)/inputs -prepdir = $(padir)/prepare +prepdir = $(badir)/prepare mtexdir = $(texdir)/macros installdir = $(bsdir)/installed bashdir = reproduce/analysis/bash @@ -56,10 +56,10 @@ pconfdir = reproduce/analysis/config # Preparation phase # ----------------- # -# This Makefile is loaded both for the `prepare' phase and the `make' +# This Makefile is loaded both for the 'prepare' phase and the 'make' # phase. But the preparation files should be dealt with differently -# (depending on the phase). In the `prepare' phase, the main directory -# should be created, and in the `make' phase, its contents should be +# (depending on the phase). In the 'prepare' phase, the main directory +# should be created, and in the 'make' phase, its contents should be # loaded. # # If you don't need any preparation, please simply comment these lines. @@ -87,6 +87,14 @@ endif # (independent parts of the paper can be added to it independently). To fix # this problem, when we are in a group setting, we'll use the user's ID to # create a separate LaTeX build directory for each user. +# +# The same logic applies to the final paper PDF: each user will create a +# separte final PDF (for example 'paper-user1.pdf' and 'paper-user2.pdf') +# and no 'paper.pdf' will be built. This isn't a problem because +# 'initialize.tex' is a .PHONY prerequisite, so the rule to build the final +# paper is always executed (even if it is present and nothing has +# changed). So in terms of over-all efficiency and processing steps, this +# doesn't change anything. ifeq (x$(GROUP-NAME),x) texbtopdir = build else @@ -104,7 +112,7 @@ tikzdir = $(texbdir)/tikz # --------------------------- # # Before defining the local sub-environment here, we'll need to save the -# system's environment for some scenarios (for example after `clean'ing the +# system's environment for some scenarios (for example after 'clean'ing the # built programs). curdir := $(shell echo $$(pwd)) @@ -117,16 +125,16 @@ curdir := $(shell echo $$(pwd)) # # We want the full recipe to be executed in one call to the shell. Also we # want Make to run the specific version of Bash that we have installed -# during `./project configure' time. +# during './project configure' time. # # Regarding the directories, this project builds its major dependencies # itself and doesn't use the local system's default tools. With these # environment variables, we are setting it to prefer the software we have # build here. # -# `TEXINPUTS': we have to remove all possible user-specified directories to -# avoid conflicts with existing TeX Live solutions. Later (in `paper.mk'), -# we are also going to overwrite `TEXINPUTS' just before `pdflatex'. +# 'TEXINPUTS': we have to remove all possible user-specified directories to +# avoid conflicts with existing TeX Live solutions. Later (in 'paper.mk'), +# we are also going to overwrite 'TEXINPUTS' just before 'pdflatex'. .ONESHELL: .SHELLFLAGS = -ec export TERM=xterm @@ -144,12 +152,12 @@ export LD_LIBRARY_PATH := $(installdir)/lib # will be empty. export CPATH := $(SYS_CPATH) -# RPATH is automatically written in macOS, so `DYLD_LIBRARY_PATH' is +# RPATH is automatically written in macOS, so 'DYLD_LIBRARY_PATH' is # ultimately redundant. But on some systems, even having a single value # causes crashs (see bug #56682). So we'll just give it no value at all. export DYLD_LIBRARY_PATH := -# OpenMPI can depend on an existing `ssh' or `rsh' binary. However, because +# OpenMPI can depend on an existing 'ssh' or 'rsh' binary. However, because # of security reasons, its best to not install them, disable any # remote-shell accesss through this environment variable. export OMPI_MCA_plm_rsh_agent=/bin/false @@ -163,7 +171,7 @@ export BASH_ENV := $(shell pwd)/reproduce/software/shell/bashrc.sh # Python enviroment # ----------------- # -# The main Python environment variable is `PYTHONPATH'. However, so far we +# The main Python environment variable is 'PYTHONPATH'. However, so far we # have found several other Python-related environment variables on some # systems which might interfere. To be safe, we are removing all their # values. @@ -187,10 +195,10 @@ export MPI_PYTHON3_SITEARCH := # directories (or possible sub-directories) for individual steps will be # defined and added within their own Makefiles. # -# The `.SUFFIXES' rule with no prerequisite is defined to eliminate all the +# The '.SUFFIXES' rule with no prerequisite is defined to eliminate all the # default implicit rules. The default implicit rules are to do with -# programming (for example converting `.c' files to `.o' files). The -# problem they cause is when you want to debug the make command with `-d' +# programming (for example converting '.c' files to '.o' files). The +# problem they cause is when you want to debug the make command with '-d' # option: they add too many extra checks that make it hard to find what you # are looking for in the outputs. .SUFFIXES: @@ -201,8 +209,11 @@ $(lockdir): | $(bsdir); mkdir $@ # Version and distribution tarball definitions -project-commit-hash := $(shell if [ -d .git ]; then \ - echo $$(git describe --dirty --always --long); else echo NOGIT; fi) +project-commit-hash := $(shell \ + if [ -d .git ]; then \ + export LD_LIBRARY_PATH="$(installdir)/lib"; \ + echo $$($(installdir)/bin/git describe --dirty --always --long); \ + else echo NOGIT; fi) project-package-name := maneaged-$(project-commit-hash) project-package-contents = $(texdir)/$(project-package-name) @@ -213,10 +224,10 @@ project-package-contents = $(texdir)/$(project-package-name) # High-level Makefile management # ------------------------------ # -# About `.PHONY': these are targets that must be built even if a file with +# About '.PHONY': these are targets that must be built even if a file with # their name exists. # -# Only `$(mtexdir)/initialize.tex' corresponds to a file. This is because +# Only '$(mtexdir)/initialize.tex' corresponds to a file. This is because # we want to ensure that the file is always built in every run: it contains # the project version which may change between two separate runs, even when # no file actually differs. @@ -229,14 +240,20 @@ texclean: mkdir $(texdir)/build/tikz # 'tikz' is assumed to already exist. clean: - # Delete the top-level PDF file. +# Delete the top-level PDF file. rm -f *.pdf - # Delete all the built outputs except the dependency - # programs. We'll use Bash's extended options builtin (`shopt') to - # enable "extended glob" (for listing of files). It allows extended - # features like ignoring the listing of a file with `!()' that we - # are using afterwards. +# Delete possible LaTeX output in top directory. This can happen when +# the user has run LaTeX with applications other than maneage. For +# example, when opening 'paper.tex' file with 'texstudio' and +# executing 'build'. + rm -f *.aux *.log *.synctex *.auxlock *.dvi *.out *.run.xml *.bcf + +# Delete all the built outputs except the dependency programs. We'll +# use Bash's extended options builtin ('shopt') to enable "extended +# glob" (for listing of files). It allows extended features like +# ignoring the listing of a file with '!()' that we are using +# afterwards. shopt -s extglob rm -rf $(texdir)/macros/!(dependencies.tex|dependencies-bib.tex|hardware-parameters.tex) rm -rf $(badir)/!(tex) $(texdir)/!(macros|$(texbtopdir)) @@ -244,14 +261,13 @@ clean: rm -rf $(bsdir)/preparation-done.mk distclean: clean - # Without cleaning the Git hooks, we won't be able to easily - # commit or checkout after this task is done. So we'll remove them - # first. +# Without cleaning the Git hooks, we won't be able to easily commit +# or checkout after this task is done. So we'll remove them first. rm -f .git/hooks/post-checkout .git/hooks/pre-commit - # We'll be deleting the built environent programs and just need the - # `rm' program. So for this recipe, we'll use the host system's - # `rm', not our own. +# We'll be deleting the built environent programs and just need the +# 'rm' program. So for this recipe, we'll use the host system's 'rm', +# not our own. $$sys_rm -rf $(BDIR) $$sys_rm -f .local .build $(pconfdir)/LOCAL.conf @@ -268,15 +284,15 @@ distclean: clean # without having to worry about the technicalities of the analysis. $(project-package-contents): paper.pdf | $(texdir) - # Set up the output directory, delete it if it exists and remake it - # to fill with new contents. +# Set up the output directory, delete it if it exists and remake it +# to fill with new contents. dir=$@ rm -rf $$dir mkdir $$dir curdir=$$(pwd) - # Build a small Makefile to help in automatizing the paper building - # (including the bibliography). +# Build a small Makefile to help in automatizing the paper building +# (including the bibliography). m=$$dir/Makefile echo "paper.pdf: paper.tex paper.bbl" > $$m printf "\tlatex -shell-escape -halt-on-error paper\n" >> $$m @@ -291,94 +307,92 @@ $(project-package-contents): paper.pdf | $(texdir) printf "\trm -f *.aux *.auxlock *.bbl *.bcf\n" >> $$m printf "\trm -f *.blg *.log *.out *.run.xml\n" >> $$m - # Copy the top-level contents (see next step for `paper.tex'). +# Copy the top-level contents (see next step for 'paper.tex'). cp COPYING project README.md README-hacking.md $$dir/ - # Since the packaging is mainly intended for high-level building of - # the PDF with LaTeX, we'll comment the `makepdf' LaTeX macro in - # the paper. This will disable usage of TiKZ. +# Since the packaging is mainly intended for high-level building of +# the PDF with LaTeX, we'll comment the 'makepdf' LaTeX macro in the +# paper. This will disable usage of TiKZ. sed -e's|\\newcommand{\\makepdf}{}|%\\newcommand{\\makepdf}{}|' \ paper.tex > $$dir/paper.tex - # Copy ONLY the version-controlled files in 'reproduce' and - # 'tex/src'. This is important because files like 'LOCAL.conf' (in - # 'reproduce/software/config') should not be archived, they contain - # information about the host computer and are irrelevant for - # others. Also some project authors may have temporary files here - # that are not under version control and thus shouldn't be archived - # (although this is bad practice, but that is up to the user). - # - # To keep the sub-directory structure, we are packaging the files - # with Tar, piping it, and unpacking it in the archive - # directory. So afterwards we need to come back to the current - # directory. +# Copy ONLY the version-controlled files in 'reproduce' and +# 'tex/src'. This is important because files like 'LOCAL.conf' (in +# 'reproduce/software/config') should not be archived, they contain +# information about the host computer and are irrelevant for +# others. Also some project authors may have temporary files here +# that are not under version control and thus shouldn't be archived +# (although this is bad practice, but that is up to the user). +# +# To keep the sub-directory structure, we are packaging the files +# with Tar, piping it, and unpacking it in the archive directory. So +# afterwards we need to come back to the current directory. tar -c -f - $$(git ls-files peer-review reproduce tex/src) \ | (cd $$dir ; tar -x -f -) cd $(curdir) - # Build the other two subdirectories of 'tex/' that we need in the - # archive (in the actual project, these are symbolic links to the - # build directory). +# Build the other two subdirectories of 'tex/' that we need in the +# archive (in the actual project, these are symbolic links to the +# build directory). mkdir $$dir/tex/tikz $$dir/tex/build - # Copy the 'tex/build' directory into the archive (excluding the - # temporary archive directory that we are now copying to). We will - # be using Bash's extended globbing ('extglob') for excluding this - # directory. +# Copy the 'tex/build' directory into the archive (excluding the +# temporary archive directory that we are now copying to). We will be +# using Bash's extended globbing ('extglob') for excluding this +# directory. shopt -s extglob cp -r tex/img $$dir/tex/img cp tex/tikz/*.eps $$dir/tex/tikz cp -r tex/build/!($(project-package-name)) $$dir/tex/build - # Clean up the $(texdir)/build* directories in the archive (when - # building in a group structure, there will be `build-user1', - # `build-user2' and etc). These are just temporary LaTeX build - # files and don't have any relevant/hand-written files in them. +# Clean up the $(texdir)/build* directories in the archive (when +# building in a group structure, there will be 'build-user1', +# 'build-user2' and etc). These are just temporary LaTeX build files +# and don't have any relevant/hand-written files in them. rm -rf $$dir/tex/build/build* - # If the project has any PDFs in its 'tex/tikz' directory (TiKZ or - # PGFPlots was used to generate them), copy them too. +# If the project has any PDFs in its 'tex/tikz' directory (TiKZ or +# PGFPlots was used to generate them), copy them too. if ls tex/tikz/*.pdf &> /dev/null; then cp tex/tikz/*.pdf $$dir/tex/tikz fi - # When submitting to places like arXiv, they will just run LaTeX - # once and won't run `biber'. So we need to also keep the `.bbl' - # file into the distributing tarball. However, BibLaTeX is - # particularly sensitive to versioning (a `.bbl' file has to be - # read by the same BibLaTeX version that created it). This is hard - # to do with non-up-to-date places like arXiv. Therefore, we thus - # just copy the whole of BibLaTeX's source (the version we are - # using) into the top tarball directory. In this way, arXiv's LaTeX - # engine will use the same BibLaTeX version to interpret the `.bbl' - # file. TIP: you can use the same strategy for other LaTeX packages - # that may cause problems on the arXiv server. +# When submitting to places like arXiv, they will just run LaTeX once +# and won't run 'biber'. So we need to also keep the '.bbl' file into +# the distributing tarball. However, BibLaTeX is particularly +# sensitive to versioning (a '.bbl' file has to be read by the same +# BibLaTeX version that created it). This is hard to do with +# non-up-to-date places like arXiv. Therefore, we thus just copy the +# whole of BibLaTeX's source (the version we are using) into the top +# tarball directory. In this way, arXiv's LaTeX engine will use the +# same BibLaTeX version to interpret the '.bbl' file. TIP: you can +# use the same strategy for other LaTeX packages that may cause +# problems on the arXiv server. cp tex/build/build/paper.bbl $$dir/ tltopdir=.local/texlive/maneage/texmf-dist/tex/latex #find $$tltopdir/biblatex/ -maxdepth 1 -type f -print0 \ # | xargs -0 cp -t $$dir - # Just in case the package users want to rebuild some of the - # figures (manually un-comment the `makepdf' command we commented - # above), correct the TikZ external directory, so the figures can - # be rebuilt. +# Just in case the package users want to rebuild some of the figures +# (manually un-comment the 'makepdf' command we commented above), +# correct the TikZ external directory, so the figures can be rebuilt. pgfsettings="$$dir/tex/src/preamble-pgfplots.tex" sed -e's|{tikz/}|{tex/tikz/}|' $$pgfsettings > $$pgfsettings.new mv $$pgfsettings.new $$pgfsettings - # PROJECT SPECIFIC - # ---------------- - # Put any project-specific distribution steps here. +# PROJECT SPECIFIC +# ---------------- +# Put any project-specific distribution steps here. cd $$curdir cp tex/build/build/appendix.bbl $$dir/ - # ---------------- +# ---------------- - # Clean temporary files that may have been created by text editors. +# Clean temporary files that may have been created by text editors. cd $(texdir) find $(project-package-name) -name \*~ -delete find $(project-package-name) -name \*.swp -delete -# Package into `.tar.gz' or '.tar.lz'. +# Package into '.tar.gz' or '.tar.lz'. dist dist-lzip: $(project-package-contents) curdir=$$(pwd) cd $(texdir) @@ -394,7 +408,7 @@ dist dist-lzip: $(project-package-contents) cd $$curdir mv $(texdir)/$(project-package-name).tar.$$suffix ./ -# Package into `.zip'. +# Package into '.zip'. dist-zip: $(project-package-contents) curdir=$$(pwd) cd $(texdir) @@ -495,13 +509,13 @@ print-general-metadata = \ # This file will store some basic info about the project that is necessary # for the final PDF. Since these are not version controlled, it must be # calculated everytime the project is run. So even though this file -# actually exists, it is also aded as a `.PHONY' target above. +# actually exists, it is also aded as a '.PHONY' target above. $(mtexdir)/initialize.tex: | $(mtexdir) - # Version and title of project. About the starting '@': since these - # commands are run every time with './project make', it is annoying - # to print them on the standard output every time. With the '@', - # make will not print the commands that it runs in this recipe. +# Version and title of project. About the starting '@': since these +# commands are run every time with './project make', it is annoying +# to print them on the standard output every time. With the '@', make +# will not print the commands that it runs in this recipe. @d=$$(git show -s --format=%aD HEAD | awk '{print $$2, $$3, $$4}') echo "\newcommand{\projectdate}{$$d}" > $@ echo "\newcommand{\projecttitle}{$(metadata-title)}" >> $@ @@ -514,16 +528,15 @@ $(mtexdir)/initialize.tex: | $(mtexdir) v=$$(echo $(metadata-doi-zenodo) | sed -e's/\./ /g' | awk '{print $$NF}') echo "\newcommand{\projectzenodoid}{$$v}" >> $@ - # Calculate the latest Maneage commit used to build this - # project: - # - The project may not have the 'maneage' branch (for example - # after cloning from a fork that didn't include it!). In this - # case, we'll print a descriptive warning, telling the user what - # should be done (reporting the last merged commit and its date - # is very useful for the future). - # - The '--dirty' option (used in 'project-commit-hash') isn't - # applicable to "commit-ishes" (direct quote from Git's error - # message!). +# Calculate the latest Maneage commit used to build this project: +# - The project may not have the 'maneage' branch (for example +# after cloning from a fork that didn't include it!). In this +# case, we'll print a descriptive warning, telling the user what +# should be done (reporting the last merged commit and its date +# is very useful for the future). +# - The '--dirty' option (used in 'project-commit-hash') isn't +# applicable to "commit-ishes" (direct quote from Git's error +# message!). if git log maneage -1 &> /dev/null; then c=$$(git merge-base HEAD maneage) v=$$(git describe --always --long $$c) diff --git a/reproduce/analysis/make/paper.mk b/reproduce/analysis/make/paper.mk index 00bd3b5..da2702c 100644 --- a/reproduce/analysis/make/paper.mk +++ b/reproduce/analysis/make/paper.mk @@ -1,6 +1,6 @@ # Build the final PDF paper/report. # -# Copyright (C) 2018-2021 Mohammad Akhlaghi <mohammad@akhlaghi.org> +# Copyright (C) 2018-2022 Mohammad Akhlaghi <mohammad@akhlaghi.org> # # This Makefile is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -24,14 +24,14 @@ # # To report the input settings and results, the final report's PDF (final # target of this project) uses macros generated from various steps of the -# project. All these macros are defined through `$(mtexdir)/project.tex'. +# project. All these macros are defined through '$(mtexdir)/project.tex'. # -# `$(mtexdir)/project.tex' is actually just a combination of separate files +# '$(mtexdir)/project.tex' is actually just a combination of separate files # that keep the LaTeX macros related to each workhorse Makefile (in -# `reproduce/src/make/*.mk'). Those individual macros are pre-requisites to -# `$(mtexdir)/verify.tex' which will check them before starting to build +# 'reproduce/src/make/*.mk'). Those individual macros are pre-requisites to +# '$(mtexdir)/verify.tex' which will check them before starting to build # the paper. The only workhorse Makefile that doesn't need to produce LaTeX -# macros is this Makefile (`reproduce/src/make/paper.mk'). +# macros is this Makefile ('reproduce/src/make/paper.mk'). # # This file is thus the interface between the analysis/processing steps and # the final PDF: when we get to this point, all the processing has been @@ -39,38 +39,38 @@ # # Note that if you don't want the final PDF and just want the processing # and file outputs, you can give any value other than 'yes' to -# 'pdf-build-final' in `reproduce/analysis/config/pdf-build.conf'. +# 'pdf-build-final' in 'reproduce/analysis/config/pdf-build.conf'. $(mtexdir)/project.tex: $(mtexdir)/verify.tex - # If no PDF is requested, or if LaTeX isn't available, don't - # continue to building the final PDF. Otherwise, merge all the TeX - # macros into one for building the PDF. +# If no PDF is requested, or if LaTeX isn't available, don't continue +# to building the final PDF. Otherwise, merge all the TeX macros into +# one for building the PDF. @if [ -f .local/bin/latex ] && [ x"$(pdf-build-final)" = xyes ]; then - # Put a LaTeX input command for all the necessary macro files. - # 'hardware-parameters.tex' is created in 'configure.sh'. +# Put a LaTeX input command for all the necessary macro files. +# 'hardware-parameters.tex' is created in 'configure.sh'. projecttex=$(mtexdir)/project.tex rm -f $$projecttex for t in $(subst paper,,$(makesrc)) hardware-parameters; do echo "\input{tex/build/macros/$$t.tex}" >> $$projecttex done - # Possibly print the appendix in the final PDF. +# Possibly print the appendix in the final PDF. if [ x"$(separatesupplement)" = x1 ]; then echo "\newcommand{\separatesupplement}{}" >> $$projecttex fi - # Possibly highlight the '\new' parts of the text. +# Possibly highlight the '\new' parts of the text. if [ x"$(highlightnew)" = x1 ]; then echo "\newcommand{\highlightnew}{}" >> $$projecttex fi - # Possibly show the text within '\tonote'. +# Possibly show the text within '\tonote'. if [ x"$(highlightnotes)" = x1 ]; then echo "\newcommand{\highlightnotes}{}" >> $$projecttex fi - # The paper shouldn't be built. +# The paper shouldn't be built. else echo echo "-----" @@ -101,76 +101,76 @@ $(mtexdir)/project.tex: $(mtexdir)/verify.tex # The bibliography # ---------------- # -# We need to run the `bibtex' program on the output of LaTeX to generate +# We need to run the 'bibtex' program on the output of LaTeX to generate # the necessary bibliography before making the final paper. So we'll first -# have one run of LaTeX (similar to the `paper.pdf' recipe), then `biber'. +# have one run of LaTeX (similar to the 'paper.pdf' recipe), then 'bibtex'. # -# NOTE: `$(mtexdir)/project.tex' is an order-only-prerequisite for -# `paper.bbl'. This is because we need to run LaTeX in both the `paper.bbl' -# recipe and the `paper.pdf' recipe. But if `tex/src/references.bib' hasn't +# NOTE: '$(mtexdir)/project.tex' is an order-only-prerequisite for +# 'paper.bbl'. This is because we need to run LaTeX in both the 'paper.bbl' +# recipe and the 'paper.pdf' recipe. But if 'tex/src/references.tex' hasn't # been modified, we don't want to re-build the bibliography, only the final # PDF. bbls = $(foreach t,$(subst .pdf,,$(top-pdfs)),$(texbdir)/$(t).bbl) $(bbls): $(texbdir)/%.bbl: tex/src/references.tex \ $(mtexdir)/dependencies-bib.tex | $(mtexdir)/project.tex - # If `$(mtexdir)/project.tex' is empty, don't build PDF. +# If '$(mtexdir)/project.tex' is empty, don't build PDF. @macros=$$(cat $(mtexdir)/project.tex) if [ x"$$macros" != x ]; then - # Unfortunately I can't get bibtex to look into a special - # directory for the references, so we'll copy it into the LaTeX - # building directory. +# Unfortunately I can't get bibtex to look into a special directory +# for the references, so we'll copy it into the LaTeX building +# directory. p=$$(pwd) if ! [ -L $(texbdir)/references.bib ]; then ln -sf $$p/tex/src/references.tex $(texbdir)/references.bib fi - # Copy the improved IEEE bst file into the build directory. - # The improved bst file provides ArXiv clickable URLs and - # if available, open-access URLs based on the DOIs, with - # closed-access URLs as a fallback, via https://oadoi.org . +# Copy the improved IEEE bst file into the build directory. The +# improved bst file provides ArXiv clickable URLs and if available, +# open-access URLs based on the DOIs, with closed-access URLs as a +# fallback, via https://oadoi.org . ln -sf $$p/tex/src/IEEEtran_openaccess.bst $(texbdir)/ - # We'll run LaTeX first to generate the `.bcf' file (necessary - # for `biber') and then run `biber' to generate the `.bbl' file. +# We'll run LaTeX first to generate the '.bcf' file (necessary for +# 'bibtex') and then run 'bibtex' to generate the '.bbl' file. export TEXINPUTS=$$p: cd $(texbdir); - # Delete any possibly existing target (a '.bbl' file) to avoid - # complications with LaTeX being run before the command that - # generates it. Otherwise users will have to manually delete - # it. It will be built anyway once this rule is done. +# Delete any possibly existing target (a '.bbl' file) to avoid +# complications with LaTeX being run before the command that +# generates it. Otherwise users will have to manually delete it. It +# will be built anyway once this rule is done. rm -f $@ - # Put a link to the main LaTeX source that we want to build. +# Put a link to the main LaTeX source that we want to build. if [ $* = paper ]; then sdir="$$p" else sdir="$$p"/tex/src fi ln -sf "$$sdir"/$*.tex ./ - # The pdflatex option '-shell-escape' is "normally disallowed for - # security reasons" according to the `info pdflatex' manual, but - # is enabled here in order to allow the use of PGFPlots. If you - # do not use PGFPlots, then you can remove the `-shell-escape' - # option for better security. See - # https://savannah.nongnu.org/task/?15694 for details. +# The pdflatex option '-shell-escape' is "normally disallowed for +# security reasons" according to the 'info pdflatex' manual, but is +# enabled here in order to allow the use of PGFPlots. If you do not +# use PGFPlots, then you should remove the '-shell-escape' option +# for better security. See https://savannah.nongnu.org/task/?15694 +# for details. latex -shell-escape -halt-on-error $*.tex - # When we are building the main paper and the appendices are to - # be built within the main paper's PDF, we need two - # bibliographies: one for the main body, and one for the - # appendix. For this, we use 'multibib'. Multibib creates a - # separate '.aux' file for each bibliography. +# When we are building the main paper and the appendices are to be +# built within the main paper's PDF, we need two bibliographies: +# one for the main body, and one for the appendix. For this, we use +# 'multibib'. Multibib creates a separate '.aux' file for each +# bibliography. bibtex $* if [ x"$(separatesupplement)" != x1 ]; then bibtex appendix fi - # Hack: tidy up eprint+doi style that didn't work in .bst file. - # TODO (better): read Part 4 of - # http://mirrors.ctan.org/info/bibtex/tamethebeast/ttb_en.pdf - # and fix the .bst style properly. +# Hack: tidy up eprint+doi style that didn't work in .bst file. +# TODO (better): read Part 4 of +# http://mirrors.ctan.org/info/bibtex/tamethebeast/ttb_en.pdf and +# fix the .bst style properly. cp -pv $*.bbl $*-tmp.bbl \ && sed -e "s/\'/EOLINE/g" $*-tmp.bbl \ | tr -d '\n' \ @@ -188,7 +188,7 @@ $(bbls): $(texbdir)/%.bbl: tex/src/references.tex \ | sed -e 's/EOLINE/\n/g' > appendix.bbl fi - # Paper-specific hacks for reducing very-long author lists. +# Paper-specific hacks for reducing very-long author lists. cp -pv $*.bbl $*-tmp.bbl \ && sed -e "s/\'/EOLINE/g" $*-tmp.bbl \ | tr -d '\n' \ @@ -196,7 +196,7 @@ $(bbls): $(texbdir)/%.bbl: tex/src/references.tex \ | sed -e 's;, V\..Khodiyar[^{]*Whyte; et al.\\/;' \ | sed -e 's/EOLINE/\n/g' > $*.bbl - # The pre-final run of LaTeX after 'paper.bbl' was created. +# The pre-final run of LaTeX after 'paper.bbl' was created. latex -shell-escape -halt-on-error $*.tex fi @@ -207,36 +207,36 @@ $(bbls): $(texbdir)/%.bbl: tex/src/references.tex \ # The final paper # --------------- # -# Run LaTeX in the `$(texbdir)' directory so all the intermediate and +# Run LaTeX in the '$(texbdir)' directory so all the intermediate and # auxiliary files stay there and keep the top directory clean. To be able # to run everything cleanly from there, it is necessary to add the current -# directory (top project directory) to the `TEXINPUTS' environment +# directory (top project directory) to the 'TEXINPUTS' environment # variable. $(top-pdfs): %.pdf: $(mtexdir)/project.tex paper.tex \ tex/src/appendix-*.tex $(texbdir)/%.bbl - # If `$(mtexdir)/project.tex' is empty, don't build the PDF. +# If '$(mtexdir)/project.tex' is empty, don't build the PDF. @macros=$$(cat $(mtexdir)/project.tex) if [ x"$$macros" != x ]; then - # Go into the top TeX build directory and make the paper. +# Go into the top TeX build directory and make the paper. p=$$(pwd) export TEXINPUTS=$$p: cd $(texbdir) - # See above for a warning and brief discussion on the the - # pdflatex option `-shell-escape'. +# See above for a warning and brief discussion on the the pdflatex +# option '-shell-escape'. latex -shell-escape -halt-on-error $*.tex - # Convert the DVI to PostScript, and the PostScript to PDF. The - # `-dNOSAFER' option to GhostScript allows transparencies in the - # conversion from PostScript to PDF, see - # https://www.ghostscript.com/doc/current/Language.htm#Transparency +# Convert the DVI to PostScript, and the PostScript to PDF. The +# '-dNOSAFER' option to GhostScript allows transparencies in the +# conversion from PostScript to PDF, see +# https://www.ghostscript.com/doc/current/Language.htm#Transparency dvips $*.dvi ps2pdf $*.ps - # Come back to the top project directory and copy the built PDF - # file here. +# Come back to the top project directory and copy the built PDF +# file here. cd "$$p" cp $(texbdir)/$*.pdf $@ fi diff --git a/reproduce/analysis/make/prepare.mk b/reproduce/analysis/make/prepare.mk index d0b61d9..ecb6842 100644 --- a/reproduce/analysis/make/prepare.mk +++ b/reproduce/analysis/make/prepare.mk @@ -1,6 +1,6 @@ -# Basic preparations, called by `./project prepare'. +# Basic preparations, called by './project prepare'. # -# Copyright (C) 2019-2021 Mohammad Akhlaghi <mohammad@akhlaghi.org> +# Copyright (C) 2019-2022 Mohammad Akhlaghi <mohammad@akhlaghi.org> # # This Makefile is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -21,30 +21,29 @@ # Final-target # -# Without this file, `./project make' won't work. +# Without this file, './project make' won't work. prepare-dep = $(subst prepare, ,$(makesrc)) $(bsdir)/preparation-done.mk: \ $(foreach s, $(prepare-dep), $(mtexdir)/$(s).tex) - # If you need to add preparations define targets above to do the - # preparations, then set the value below to `yes'. Recall that just - # like `./project make', before loading this file, `./project - # prepare' loads loads `initialize.mk' and `download.mk', so you - # can safely assume everything that is defined there in the - # preparation phase also. - # - # TIP: the targets can actually be automatically generated - # Makefiles that are used by `./project make'. They can include - # variables, or automatically generated rules. Just make sure that - # those Makefiles aren't written in the source directory. Even - # though they are Makefiles, they are automatically built, so they - # don't belong in the source. `$(prepdir)' has been defined for - # this purpose (see `initialize.mk'), we recommend that you put all - # automatically generated Makefiles under this directory. In the - # `make' phase, `initialize.mk' will automatically load all the - # `*.mk' files. If you need to load your generated - # configuration-makefiles before automatically generated Makefiles - # containing rules, you can use some naming convension like - # `conf-*.mk' and `rule-*.mk', or you can put them in - # subdirectories. +# If you need to add preparations define targets above to do the +# preparations, then set the value below to 'yes'. Recall that just +# like './project make', before loading this file, './project +# prepare' loads loads 'initialize.mk' and 'download.mk', so you can +# safely assume everything that is defined there in the preparation +# phase also. +# +# TIP: the targets can actually be automatically generated Makefiles +# that are used by './project make'. They can include variables, or +# automatically generated rules. Just make sure that those Makefiles +# aren't written in the source directory. Even though they are +# Makefiles, they are automatically built, so they don't belong in +# the source. '$(prepdir)' has been defined for this purpose (see +# 'initialize.mk'), we recommend that you put all automatically +# generated Makefiles under this directory. In the 'make' phase, +# 'initialize.mk' will automatically load all the '*.mk' files. If +# you need to load your generated configuration-makefiles before +# automatically generated Makefiles containing rules, you can use +# some naming convension like 'conf-*.mk' and 'rule-*.mk', or you can +# put them in subdirectories. @echo "include-prepare-results = no" > $@ diff --git a/reproduce/analysis/make/top-make.mk b/reproduce/analysis/make/top-make.mk index 27c1b5b..7755174 100644 --- a/reproduce/analysis/make/top-make.mk +++ b/reproduce/analysis/make/top-make.mk @@ -1,6 +1,6 @@ # Top-level Makefile (first to be loaded). # -# Copyright (C) 2018-2021 Mohammad Akhlaghi <mohammad@akhlaghi.org> +# Copyright (C) 2018-2022 Mohammad Akhlaghi <mohammad@akhlaghi.org> # # This Makefile is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -20,7 +20,7 @@ # Load the local configuration (created after running -# `./project configure'). +# './project configure'). include reproduce/software/config/LOCAL.conf @@ -30,7 +30,7 @@ include reproduce/software/config/LOCAL.conf # Ultimate target of this project # ------------------------------- # -# The final paper/report (`paper.pdf') is the main target of this +# The final paper/report ('paper.pdf') is the main target of this # project. As defined in the Make paradigm, it must be the first target # that Make encounters (immediately after loading the local configuration # settings, necessary for a group building scenario mentioned next). @@ -50,8 +50,8 @@ include reproduce/software/config/LOCAL.conf # # Controlling this requires two variables that are available at this stage: # -# - `GROUP-NAME': from `LOCAL.conf' (which was built by `./project configure'). -# - `maneage_group_name': value to the `--group' option. +# - 'GROUP-NAME': from 'LOCAL.conf' (which was built by './project configure'). +# - 'maneage_group_name': value to the '--group' option. # # The analysis is only done when both have the same group name. Note that # when the project isn't being built for a group, both variables will be an @@ -63,7 +63,7 @@ include reproduce/software/config/LOCAL.conf # # If you are just interested in the processing and don't want to build the # PDF, you can skip the creation of the final PDF by giving a value of -# `yes' to `pdf-build-final' in `reproduce/analysis/config/pdf-build.conf'. +# 'yes' to 'pdf-build-final' in 'reproduce/analysis/config/pdf-build.conf'. ifeq ($(separatesupplement),0) top-pdfs = paper.pdf else @@ -92,13 +92,13 @@ endif # To keep things clean, managable and readable, each set of operations # is (and must be) classified (modularized) by context into separate # Makefiles: the more the better. These modular steps are then -# included in this top-level Makefile through the `include' command of +# included in this top-level Makefile through the 'include' command of # the next step. Each Makefile should also produce a LaTeX macro file # with the same fixed name (used to keep all the parameters and # relevant outputs of the steps in it for the final paper). # # In the rare case that no special LaTeX macros are necessary in a -# workhorse Makefile, you can simply make an empty file with `touch +# workhorse Makefile, you can simply make an empty file with 'touch # $@'. This will not add any lines to the final combined LaTeX macros # file, but will create the file that is a prerequisite to the final # paper generation. @@ -112,7 +112,7 @@ endif # IMPORTANT NOTE: order matters in the inclusion of the processing # Makefiles. As the project grows, some Makefiles will define # variables/dependencies that later Makefiles need. Therefore we are using -# a `foreach' loop in the next step to explicitly request loading them in +# a 'foreach' loop in the next step to explicitly request loading them in # the same order that they are defined here (we aren't just using a # wild-card like the configuration Makefiles). makesrc = initialize \ @@ -136,7 +136,7 @@ makesrc = initialize \ # contain rules to actually do this project's processing. # # But before that, we need to identify the phase for the Makefiles that are -# run both in `./project prepare' and `./project make'. +# run both in './project prepare' and './project make'. project-phase = make include reproduce/analysis/config/*.conf include $(foreach s,$(makesrc), reproduce/analysis/make/$(s).mk) diff --git a/reproduce/analysis/make/top-prepare.mk b/reproduce/analysis/make/top-prepare.mk index fb5700e..3950bf1 100644 --- a/reproduce/analysis/make/top-prepare.mk +++ b/reproduce/analysis/make/top-prepare.mk @@ -1,10 +1,10 @@ # Do basic preparations to optimize the project's running. # -# NOTE: This file is very similar to `top-make.mk', so the large comments +# NOTE: This file is very similar to 'top-make.mk', so the large comments # are not included here. Please see that file for thorough comments on each # step. # -# Copyright (C) 2019-2021 Mohammad Akhlaghi <mohammad@akhlaghi.org> +# Copyright (C) 2019-2022 Mohammad Akhlaghi <mohammad@akhlaghi.org> # # This Makefile is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -24,7 +24,7 @@ # Load the local configuration (created after running -# `./project configure'). +# './project configure'). include reproduce/software/config/LOCAL.conf @@ -34,7 +34,7 @@ include reproduce/software/config/LOCAL.conf # Ultimate target of this project # ------------------------------- # -# See `top-make.mk' for complete explanation. +# See 'top-make.mk' for complete explanation. ifeq (x$(maneage_group_name),x$(GROUP-NAME)) all: $(BDIR)/software/preparation-done.mk @echo "Project preparation is complete."; @@ -57,12 +57,12 @@ endif # Define source Makefiles # ----------------------- # -# See `top-make.mk' for complete explanation. +# See 'top-make.mk' for complete explanation. # -# To ensure that `prepare' and `make' have the same basic definitions and -# environment and that all `downloads' are managed in one place, both -# `./project prepare' and `./project make' will first read `initialize.mk' -# and `downloads.mk'. +# To ensure that 'prepare' and 'make' have the same basic definitions and +# environment and that all 'downloads' are managed in one place, both +# './project prepare' and './project make' will first read 'initialize.mk' +# and 'downloads.mk'. makesrc = initialize \ download \ prepare @@ -74,7 +74,7 @@ makesrc = initialize \ # Include all analysis Makefiles # ------------------------------ # -# See `top-make.mk' for complete explanation. +# See 'top-make.mk' for complete explanation. project-phase = prepare include reproduce/analysis/config/*.conf include $(foreach s,$(makesrc), reproduce/analysis/make/$(s).mk) diff --git a/reproduce/analysis/make/verify.mk b/reproduce/analysis/make/verify.mk index 6503172..ac91089 100644 --- a/reproduce/analysis/make/verify.mk +++ b/reproduce/analysis/make/verify.mk @@ -1,6 +1,6 @@ # Verify the project outputs before building the paper. # -# Copyright (C) 2020-2021 Mohammad Akhlaghi <mohammad@akhlaghi.org> +# Copyright (C) 2020-2022 Mohammad Akhlaghi <mohammad@akhlaghi.org> # # This Makefile is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -94,7 +94,7 @@ verify-txt-no-comments-no-space = \ # (generated in various stages of the analysis. # # Since each analysis step's data files are already prerequisites of their -# respective TeX macro file, its enough for `verify.tex' to depend on the +# respective TeX macro file, its enough for 'verify.tex' to depend on the # final TeX macro. # # USEFUL TIP: during the early phases of your research (when you are @@ -103,41 +103,41 @@ verify-txt-no-comments-no-space = \ # # Here is a description of the variables defined here. # -# verify-dep: The major step dependencies of `verify.tex', this includes +# verify-dep: The major step dependencies of 'verify.tex', this includes # all the steps that must be finished before it. # # verify-changes: The files whose contents are important. This is -# essentially the same as `verify-dep', but it has removed -# the `initialize' step (which is information about the +# essentially the same as 'verify-dep', but it has removed +# the 'initialize' step (which is information about the # pipeline, not the results). verify-dep = $(subst verify,,$(subst paper,,$(makesrc))) verify-check = $(subst initialize,,$(verify-dep)) $(mtexdir)/verify.tex: $(foreach s, $(verify-dep), $(mtexdir)/$(s).tex) - # Make sure that verification is actually requested, the '@' at the - # start of the recipe is added so Make doesn't print the commands - # on the standard output because this recipe is run on every call - # to the project and can be annoying (get mixed in the middle of - # the analysis outputs or the LaTeX outputs). +# Make sure that verification is actually requested, the '@' at the +# start of the recipe is added so Make doesn't print the commands on +# the standard output because this recipe is run on every call to the +# project and can be annoying (get mixed in the middle of the +# analysis outputs or the LaTeX outputs). @if [ x"$(verify-outputs)" = xyes ]; then - # Make sure the temporary output doesn't exist (because we want - # to append to it). We are making a temporary output target so if - # there is a crash in the middle, Make will not continue. If we - # write in the final target progressively, the file will exist, - # and its date will be more recent than all prerequisites, so - # next time the project is run, Make will continue and ignore the - # rest of the checks. +# Make sure the temporary output doesn't exist (because we want to +# append to it). We are making a temporary output target so if +# there is a crash in the middle, Make will not continue. If we +# write in the final target progressively, the file will exist, and +# its date will be more recent than all prerequisites, so next time +# the project is run, Make will continue and ignore the rest of the +# checks. rm -f $@.tmp - # Verify the figure datasets. +# Verify the figure datasets. $(call verify-txt-no-comments-leading-space, \ $(a2mk20f1c), 76fc5b13495c4d8e8e6f8d440304cf69) - # Verify TeX macros (the values that go into the PDF text). +# Verify TeX macros (the values that go into the PDF text). for m in $(verify-check); do file=$(mtexdir)/$$m.tex - if [ $$m == download ]; then s=64da83ee3bfaa236849927cdc001f5d3 + if [ $$m == download ]; then s=5d0ab54ca95366d1aab12196966dd3b6 elif [ $$m == format ]; then s=e04d95a539b5540c940bf48994d8d45f elif [ $$m == demo-plot ]; then s=48bffe6cf8db790c63a33302d20db77f else echo; echo "'$$m' not recognized."; exit 1 @@ -145,7 +145,7 @@ $(mtexdir)/verify.tex: $(foreach s, $(verify-dep), $(mtexdir)/$(s).tex) $(call verify-txt-no-comments-no-space, $$file, $$s, $@.tmp) done - # Move temporary file to final target. +# Move temporary file to final target. mv $@.tmp $@ else echo "% Verification was DISABLED!" > $@ |