From 31f4ea3faf6d357ae5889dc4aa9bd5b5457243ce Mon Sep 17 00:00:00 2001 From: Mohammad Akhlaghi Date: Mon, 4 Jan 2021 01:52:25 +0000 Subject: Building of less software depends on ncurses Until now, the 'less' software package (used to view large files easily on the command-line and used by Git for things like 'git diff' or 'git log') only depended on 'patchelf' (which is a very low-level software). However, as Boud reported in bug #59811 [1], building less would crash with an error saying "Cannot find terminal libraries" in some systems (including the proposed Docker image of 'README.md' which I confirmed afterwards). Looking into the 'configure' script of 'less', I noticed that 'less' is actually just checking for some functions provided by the ncurses library! With this commit, 'less' depends on 'ncurses'. I was able to confirm that with this change, 'less' successfully builds within the Docker image. [1] https://savannah.nongnu.org/bugs/?59811 --- reproduce/software/make/basic.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reproduce/software/make/basic.mk b/reproduce/software/make/basic.mk index c4f0a16..4e69a32 100644 --- a/reproduce/software/make/basic.mk +++ b/reproduce/software/make/basic.mk @@ -974,7 +974,7 @@ $(ibidir)/gmp-$(gmp-version): \ # Less is useful with Git (to view the diffs within a minimal container) # and generally to view large files easily when the project is built in a # container with a minimal OS. -$(ibidir)/less-$(less-version): $(ibidir)/patchelf-$(patchelf-version) +$(ibidir)/less-$(less-version): $(ibidir)/ncurses-$(ncurses-version) tarball=less-$(less-version).tar.gz $(call import-source, $(less-url), $(less-checksum)) $(call gbuild, less-$(less-version), static,,-j$(numthreads)) -- cgit v1.2.1 From 02e53b972750e0a5d8200b910579784c06840e09 Mon Sep 17 00:00:00 2001 From: Mohammad Akhlaghi Date: Mon, 4 Jan 2021 02:58:05 +0000 Subject: README.md: summary Dockerfile with all necessary lines in one step Until now, the description in 'README.md' to build the Dockerfile in 'README.md' had one item per line, thoroughly describing the reason behind that line. But in many cases, the user is already familiar with Docker (or has already read through the items) and just wants to have the Dockerfile ready fast. In these cases, all those extra explanations are annoying. With this commit, an item '0' has been added at the start of the item list for summary. It only contains the necessary Dockerfile contents with no extra explanation. --- README.md | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/README.md b/README.md index 0e26ddb..98ba390 100644 --- a/README.md +++ b/README.md @@ -234,6 +234,32 @@ build the final PDF, please disable internet after the configuration phase. Note that only the necessary TeXLive packages are installed (~350 MB), not the full TeXLive collection! + 0. **Summary:** If you are already familiar with Docker, then the full + Dockerfile to get the project environment setup is shown here (without + any comments or explanations, because explanations are done in the next + items). Note that the last two `COPY` lines (to copy the directory + containing software tarballs used by the project and the possible input + databases) are optional because they will be downloaded if not + available. Once you build the Docker image, your project's environment + is setup and you can go into it to run `./project make` manually. + + ```shell + FROM debian:stable-slim + RUN apt-get update && apt-get install -y gcc g++ wget + RUN useradd -ms /bin/sh maneager + USER maneager + WORKDIR /home/maneager + RUN mkdir build + RUN mkdir software + COPY --chown=maneager:maneager ./project-source /home/maneager/source + COPY --chown=maneager:maneager ./software-dir /home/maneager/software + COPY --chown=maneager:maneager ./data-dir /home/maneager/data + RUN cd /home/maneager/source \ + && ./project configure --build-dir=/home/maneager/build \ + --software-dir=/home/maneager/software \ + --input-dir=/home/maneager/data + ``` + 1. **Choose the base operating system:** The first step is to select the operating system that will be used in the docker image. Note that your choice of operating system also determines the commands of the next -- cgit v1.2.1 From dc4aa8cb8cebffaba0dae071e4e6c93f6c509b07 Mon Sep 17 00:00:00 2001 From: Mohammad Akhlaghi Date: Mon, 4 Jan 2021 03:21:03 +0000 Subject: README-hacking.md: edits and improvements to publication checklist After going through the publication checklist, some edits were made to make things more clear. Also, an item was added to remind the project author that the commit hashes on the uploaded data files should be the same. --- README-hacking.md | 63 +++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 40 insertions(+), 23 deletions(-) diff --git a/README-hacking.md b/README-hacking.md index 656a965..475f2ca 100644 --- a/README-hacking.md +++ b/README-hacking.md @@ -1010,14 +1010,17 @@ future. * *In plain-text*: If the data are in tabular form (for example the X and Y values in your plots), store them as a simple plain-text file - (for example with columns separated by white-space characters or in + (for example with columns separated by white-space characters) or in the more formal [Comma-separated - values](https://en.wikipedia.org/wiki/Comma-separated_values), or CSV, - format). If you have other types of data (for example images, or very - large tables with millions of rows/columns that can be inconvenient in - plain-text), feel free to use custom binary formats, but later, in the - description of your project on the server, tell people what software - they should use to open them. + values](https://en.wikipedia.org/wiki/Comma-separated_values) or CSV, + format). In the former case, its best to set the suffixes to `.txt` + (because most browsers/OSs will automatically know they are plain-text + and open them without needing any other software. If you have other + types of data (for example images, or very large tables with millions + of rows/columns that can be inconvenient in plain-text), feel free to + use custom binary formats, but later, in the description of your + project on the server, add a note, explaining what software they + should use to open them. * *Descriptive names*: In some papers there are many files and having cryptic names will only confuse your readers (actually, yourself in @@ -1052,7 +1055,16 @@ future. is defined in `initialize.mk`. So you can use it anywhere in your project. - * *Copyright as metadata*: people need to know if they can use the + * *Same commit hashes*: each dataset may have been created at + different phases of your project's history. If you simply upload the + produced datasets, they may therefore have different commits on + them. To avoid confusing your readers (and your self in the future), + it is best that they all have the same commit hash (which will also + be the commit hash printed in the paper). So upon publication, we + recommend deleting all of them and running `./project make` to build + them all with the same commit hash. + + * *Copyright as metadata*: people need to know if they can "use" the dataset (i.e., modify it), or possibly re-distribute it and their derived products. They also need to know how they can contact the creator of the datset (who is usually also the copyright owner). So @@ -1065,10 +1077,11 @@ future. the plots should be uploaded directly to Zenodo so they can be viewed/downloaded with a simple link in the caption. For example see the last sentence of the caption of Figure 1 in - [arXiv:2006.03018](https://arxiv.org/pdf/2006.03018.pdf), it points to - [the data](https://zenodo.org/record/3872248/files/tools-per-year.txt) - that was used to create that figure's top plot. As you see, this will - allow your paper's readers (again, most probably your future-self!) to + [arXiv:2006.03018v1](https://arxiv.org/pdf/2006.03018v1.pdf), it points + to [the + data](https://zenodo.org/record/3872248/files/tools-per-year.txt) that + was used to create that figure's top plot. As you see, this will allow + your paper's readers (again, most probably your future-self!) to directly access the numbers of each visualization (plot/figure) with a simple click in a trusted server. This also shows the major advantage of having your data as simple plain-text where possible, as described @@ -1104,20 +1117,24 @@ future. - **Fill `README.md`**: The `README.md` is *the first place* your readers are going to look into. It already has a default text with place-holders - in the form of `XXXXXX`. Please go through it and replace the - place-holders with the relevant information/links or feel free to - add/remove anything else. Just don't forget to tell your readers in - `README.md` that they can learn about this system in the - `README-hacking.md` file (ideally close to the top, like it is now). + in the form of `XXXXXX`. Please go through its first few paragraphs and + replace the place-holders with the relevant information/links or feel + free to add/remove anything else. The rest is just basic information + that is useful for any Maneage'd project. Just don't forget to tell your + readers in `README.md` that they can learn about this system in the + `README-hacking.md` file (ideally close to the top). - **Confirm if your project builds from scratch**: Before publishing anything, you should see if your project can indeed reproduce itself! - So, go to a temporary directory, clone your project from its repository - and try configuring and building it from scratch in a new-temporary - build-directory. It is important to ignore the directory you developed - your project on (source and build): you may have files there that you - forgot to import into Git or depended on in the build (it - happens!). Ideally, it would be good to try it on a different computer. + You may be mistakenly using temporarily created files that aren't built + when teh project is built from scratch (this happens a lot and is very + dangerous for the integrity of your project!). So, go to a temporary + directory, clone your project from its repository and try configuring + and building it from scratch in a new-temporary build-directory. It is + important to ignore the original directory you developed your project on + (source and build): you may have files there that you forgot to import + into Git or depended on in the build (it happens!). Ideally, it would be + good to try it on a different computer. - **Confirm if `./project make dist` works**: The special target `dist` tells the project to build a tarball that is ready to compile the LaTeX -- cgit v1.2.1 From a1a966a598eb3693463aa5b0153f37ba22cfee6d Mon Sep 17 00:00:00 2001 From: Mohammad Akhlaghi Date: Mon, 4 Jan 2021 03:32:38 +0000 Subject: Building of Less program now uses patchelf to ensure good linking After correctly setting Less to depend on 'ncurses', I noticed its still not linking to Maneage's 'ncurses', but pointing to my host system's 'ncurses' (that happens to have the same version! So it would crash on a system with a different version). This shows that like some other software, we need to manually correct the RPATH inside Less. With this command, the necessary call to 'patchelf' has been added and with it, the installed 'less' command properly linked to Maneage's internal build of 'ncurses'. --- reproduce/software/make/basic.mk | 3 +++ 1 file changed, 3 insertions(+) diff --git a/reproduce/software/make/basic.mk b/reproduce/software/make/basic.mk index 4e69a32..2a28e76 100644 --- a/reproduce/software/make/basic.mk +++ b/reproduce/software/make/basic.mk @@ -978,6 +978,9 @@ $(ibidir)/less-$(less-version): $(ibidir)/ncurses-$(ncurses-version) tarball=less-$(less-version).tar.gz $(call import-source, $(less-url), $(less-checksum)) $(call gbuild, less-$(less-version), static,,-j$(numthreads)) + if [ -f $(ibdir)/patchelf ]; then + $(ibdir)/patchelf --set-rpath $(ildir) $(ibdir)/less; + fi echo "Less $(less-version)" > $@ # On Mac OS, libtool does different things, so to avoid confusion, we'll -- cgit v1.2.1