From 716b56b71b8513faa617acb38e2a841b59910b44 Mon Sep 17 00:00:00 2001 From: Mohammad Akhlaghi Date: Sun, 8 Mar 2020 18:15:32 +0000 Subject: Menke+20 example: properly count number of papers with software Until now, I was mistakenly multiplying the fraction of papers in that journal. This is corrected with this commit. --- paper.tex | 1 + reproduce/analysis/make/analysis-1.mk | 19 +++++++++++++++---- tex/src/figure-mk20tab3.tex | 2 +- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/paper.tex b/paper.tex index 24c401e..f6aeed3 100644 --- a/paper.tex +++ b/paper.tex @@ -1126,6 +1126,7 @@ In \inlinecode{analysis1.mk} (Figure \ref{fig:mk20tab3}), we thus convert it to \input{tex/src/figure-mk20tab3.tex} \vspace{-3mm} \caption{\label{fig:mk20tab3}Simplified contents of \inlinecode{analysis1.mk}. + Here, we want to convert the downloaded XLSX dataset (Office Open XML Workbook format) to a simple plain-text fixed-width-per-column table. For the position of this subMakefile in the full project's data lineage, see Figure \ref{fig:datalineage}. In particular, here the arrows of that figure from \inlinecode{menke20.xlsx} to \inlinecode{menke20-table-3.txt} and from the latter to \inlinecode{analysis1.tex} are shown as the second and third Make rules. See Figure \ref{fig:download} and Appendix \ref{appendix:make} for more on the Make notation and Section \ref{sec:analysis} for describing the steps. diff --git a/reproduce/analysis/make/analysis-1.mk b/reproduce/analysis/make/analysis-1.mk index 9d0018e..f739306 100644 --- a/reproduce/analysis/make/analysis-1.mk +++ b/reproduce/analysis/make/analysis-1.mk @@ -47,9 +47,10 @@ $(mk20tab3): $(indir)/menke20.xlsx | $(a1dir) # count as a delimter. echo "# Column 1: YEAR [counter, i16] Year of journal's publication." > $@.tmp echo "# Column 2: NUM_PAPERS [counter, i16] Number of studied papers in that journal." >> $@.tmp - echo "# Column 3: NUM_ID_TOOLS [counter, i16] Number of software/tools that were identified." >> $@.tmp - echo "# Column 4: JOURNAL_NAME [string, str150] Name of journal." >> $@.tmp - awk 'NR>1{printf("%-10d%-10d%-10d %s\n", $$2, $$3, $$(NF-1)*$$NF, $$1)}' \ + echo "# Column 3: NUM_PAPERS_WITH_TOOLS [counter, i16] Number of papers with an identified tool." >> $@.tmp + echo "# Column 4: NUM_ID_TOOLS [counter, i16] Number of software/tools that were identified." >> $@.tmp + echo "# Column 5: JOURNAL_NAME [string, str150] Name of journal." >> $@.tmp + awk 'NR>1{printf("%-10d%-10d%-10d%-10d %s\n", $$2, $$3, $$3*$$NF, $$(NF-1), $$1)}' \ FPAT='([^,]+)|("[^"]+")' $(indir)/$$base.csv >> $@.tmp # Set the temporary file as the final target. This was done so if @@ -60,6 +61,16 @@ $(mk20tab3): $(indir)/menke20.xlsx | $(a1dir) +############################ +# Recreate Figure 1C of Menke+20. +############################ + +# awk '!/^#/{all[$1]+=$2; id[$1]+=$3} END{for(year in all){print year, id[year]/all[year]}}' menke20-table-3.txt.tmp +############################ + + + + # Main LaTeX macro file $(mtexdir)/analysis-1.tex: $(mk20tab3) | $(mtexdir) @@ -73,6 +84,6 @@ $(mtexdir)/analysis-1.tex: $(mk20tab3) | $(mtexdir) # column and separated the last numeric column from the string # column with a space. If the number of numeric columns change in # the future, the `31' also has to change. - v=$$(awk 'BEGIN{FIELDWIDTHS="31 10000"} !/^#/{print $$2}' \ + v=$$(awk 'BEGIN{FIELDWIDTHS="41 10000"} !/^#/{print $$2}' \ $(mk20tab3) | uniq | wc -l) echo "\newcommand{\menkenumjournals}{$$v}" >> $@ diff --git a/tex/src/figure-mk20tab3.tex b/tex/src/figure-mk20tab3.tex index 96468bb..3cc0cd3 100644 --- a/tex/src/figure-mk20tab3.tex +++ b/tex/src/figure-mk20tab3.tex @@ -1,6 +1,6 @@ \begin{tcolorbox} \footnotesize - \texttt{\mkcomment{1ST MAKE RULE: build the directory hosting the used table.}} + \texttt{\mkcomment{1ST MAKE RULE: build the directory hosting the converted table.}} \texttt{\mkvar{a1dir} = \$(\mkvar{BDIR})/analysis-1} -- cgit v1.2.1