dnl variable (which is substituted by AC_SUBST) is set to the result and
dnl the cache variable dx_cv_join_works is set to "yes". Otherwise,
dnl dx_cv_join_works is set to "no" and JOIN is set to an incomplete
-dnl awk-based implementation which supports no options.
+dnl awk-based implementation.
+dnl
+dnl The awk replacement currently has the following limitations:
+dnl
+dnl - Only the -v and -a options are supported.
+dnl
+dnl - Due to limitations of some awk implementations, a filename of "-"
+dnl to mean standard input should be avoided for the second file name.
+dnl Nevertheless, using "-" for the first filename should be portable.
AC_DEFUN([DX_PROG_JOIN], [AC_PREREQ([2.62])dnl
AC_REQUIRE([AC_PROG_AWK])dnl
[JOIN="$AWK -f m4_do(
[m4_pushdef([m4_include], [$][1])],
[m4_include(DX_BASEDIR[/scripts/join.awk])],
- [m4_popdef([m4_include])])"])])
+ [m4_popdef([m4_include])]) --"])])
AC_DEFUN([_DX_JOIN_DO_TEST],
[AS_IF([test ! -f conftest.b],
#
# Copyright © 2022 Nick Bowler
#
-# Partial implementation of POSIX "join" command. No options are supported.
+# Partial implementation of POSIX "join" command: only the "-v" and "-a"
+# options are implemented.
+#
+# Not all awk implementations support reading from standard input with the
+# getline function by specifying a filename of "-". In particular, busybox
+# awk will read from a file named "-" instead of standard input. Since
+# busybox-based environments are typically missing "join", this limitation
+# is problematic. As a workaround, do not use "-" for the second input
+# filename.
#
# License WTFPL2: Do What The Fuck You Want To Public License, version 2.
# This is free software: you are free to do what the fuck you want to.
# There is NO WARRANTY, to the extent permitted by law.
BEGIN {
- if (ARGC != 3) {
+ show_uniq_lhs = 0
+ shoq_uniq_rhs = 0
+ show_common = 1
+
+ # Process command-line options
+ for (i = 1; i < ARGC; i++) {
+ if (substr(ARGV[i], 1, 1) != "-" || ARGV[i] == "-")
+ break;
+
+ opt = substr(ARGV[i], 2, 1);
+ if (opt == "a" || opt == "v") {
+ num = substr(ARGV[i], 3, 1);
+ if (num == "") {
+ # option argument must be next on command-line
+ ARGV[i++] = "";
+ num = ARGV[i];
+ }
+
+ if (opt == "v") {
+ show_common = 0;
+ }
+
+ if (num == 1) {
+ show_uniq_lhs = 1;
+ } else if (num == 2) {
+ show_uniq_rhs = 1;
+ } else {
+ # invalid argument
+ exit 1;
+ }
+ } else {
+ # unsupported option
+ exit 1;
+ }
+
+ ARGV[i] = "";
+ }
+
+ if (i+2 != ARGC) {
+ # invalid usage
exit 1;
}
- file2 = ARGV[2];
- delete ARGV[2];
+ file2 = ARGV[i+1];
+ ARGV[i+1] = "";
- advance_rhs();
+ rhs_max_nr = rhs_nr = 0;
+ if (advance_rhs() == 0)
+ finish_rhs();
}
+{ $1 = $1 }
$1 == lhs_prev {
- # Rewind RHS as we have duplicate keys in LHS.
+ # Rewind RHS as we have duplicate common keys in LHS.
close(file2);
+
+ rhs_nr = 0;
advance_rhs();
}
-$1 < rhs[1] { next }
+$1 < rhs[1] {
+ if (show_uniq_lhs) {
+ print;
+ }
+ next;
+}
+
{
while ($1 > rhs[1]) {
+ if (show_uniq_rhs && rhs_nr == rhs_max_nr)
+ print_rhs();
+
if (advance_rhs() == 0)
- exit(0);
+ finish_rhs();
+
+ if (show_uniq_lhs && $1 < rhs[1])
+ print;
}
}
-$1 == rhs[1] {
- lhs_prev = $1 = $1;
+!rhs_eof && $1 == rhs[1] {
+ lhs_prev = $1;
do {
- print_match();
+ if (show_common) print_match();
advance_rhs();
- } while ($1 == rhs[1]);
+ } while (!rhs_eof && $1 == rhs[1]);
+}
+
+END {
+ if (show_uniq_rhs) {
+ do {
+ print_rhs();
+ } while (advance_rhs() > 0);
+ }
}
function advance_rhs(raw, rc)
if (rc < 0)
exit(1);
+ rhs_eof = rc == 0;
+
+ if (rhs_max_nr == rhs_nr++)
+ rhs_max_nr++;
+
split(raw, rhs);
return rc;
}
+function finish_rhs(rc)
+{
+ rc = 0;
+ if (show_uniq_lhs) {
+ do {
+ if (NR > 0) {
+ $1 = $1;
+ print;
+ }
+ } while ((rc = getline) > 0);
+ }
+ exit(-rc);
+}
+
+function print_rhs(i)
+{
+ if (!rhs_eof) {
+ if (i < 1)
+ printf "%s", rhs[1];
+ for (i = 2; i in rhs; i++) {
+ printf " %s", rhs[i];
+ }
+ print "";
+ }
+}
+
function print_match(i)
{
printf "%s", $0
- for (i = 2; i in rhs; i++) {
- printf " %s", rhs[i];
- }
- print ""
+ print_rhs(2);
}
-dnl Copyright © 2021 Nick Bowler
+dnl Copyright © 2021-2022 Nick Bowler
dnl
dnl License WTFPL2: Do What The Fuck You Want To Public License, version 2.
dnl This is free software: you are free to do what the fuck you want to.
AT_CHECK([$CC -o test0$EXEEXT test0.c && ./test0$EXEEXT], [0], [expout])
AT_CLEANUP
+
+AT_SETUP([join.awk])
+
+JOIN="$AWK -f $builddir/scripts/join.awk --"
+
+AT_DATA([a],
+[[1 a
+3 a1 x
+3 a2 x
+5 a
+6 a
+8 a1 x
+8 a2 x
+9 a1
+9 a2
+9 a3
+]])
+
+AT_DATA([b],
+[[2 b
+2 b
+3 b y
+4 b
+6 b1 y
+6 b2 y
+7 b
+8 b1 y
+8 b2 y
+]])
+
+AT_CHECK([$JOIN a b], [0],
+[[3 a1 x b y
+3 a2 x b y
+6 a b1 y
+6 a b2 y
+8 a1 x b1 y
+8 a1 x b2 y
+8 a2 x b1 y
+8 a2 x b2 y
+]])
+
+AT_CHECK([$JOIN -v1 a b], [0],
+[[1 a
+5 a
+9 a1
+9 a2
+9 a3
+]])
+
+AT_CHECK([$JOIN -v2 a b], [0],
+[[2 b
+2 b
+4 b
+7 b
+]])
+
+AT_CHECK([$JOIN -v1 -v2 a b], [0],
+[[1 a
+2 b
+2 b
+4 b
+5 a
+7 b
+9 a1
+9 a2
+9 a3
+]])
+
+AT_CHECK([$JOIN -a1 a b], [0],
+[[1 a
+3 a1 x b y
+3 a2 x b y
+5 a
+6 a b1 y
+6 a b2 y
+8 a1 x b1 y
+8 a1 x b2 y
+8 a2 x b1 y
+8 a2 x b2 y
+9 a1
+9 a2
+9 a3
+]])
+
+AT_CHECK([$JOIN -a2 a b], [0],
+[[2 b
+2 b
+3 a1 x b y
+3 a2 x b y
+4 b
+6 a b1 y
+6 a b2 y
+7 b
+8 a1 x b1 y
+8 a1 x b2 y
+8 a2 x b1 y
+8 a2 x b2 y
+]])
+
+AT_CHECK([$JOIN -a1 -a2 a b], [0],
+[[1 a
+2 b
+2 b
+3 a1 x b y
+3 a2 x b y
+4 b
+5 a
+6 a b1 y
+6 a b2 y
+7 b
+8 a1 x b1 y
+8 a1 x b2 y
+8 a2 x b1 y
+8 a2 x b2 y
+9 a1
+9 a2
+9 a3
+]])
+
+AT_CHECK([$JOIN b a], [0],
+[[3 b y a1 x
+3 b y a2 x
+6 b1 y a
+6 b2 y a
+8 b1 y a1 x
+8 b1 y a2 x
+8 b2 y a1 x
+8 b2 y a2 x
+]])
+
+AT_CHECK([$JOIN -v1 b a], [0],
+[[2 b
+2 b
+4 b
+7 b
+]])
+
+AT_CHECK([$JOIN -v2 b a], [0],
+[[1 a
+5 a
+9 a1
+9 a2
+9 a3
+]])
+
+AT_CHECK([$JOIN -v1 -v2 b a], [0],
+[[1 a
+2 b
+2 b
+4 b
+5 a
+7 b
+9 a1
+9 a2
+9 a3
+]])
+
+AT_CHECK([$JOIN -a1 b a], [0],
+[[2 b
+2 b
+3 b y a1 x
+3 b y a2 x
+4 b
+6 b1 y a
+6 b2 y a
+7 b
+8 b1 y a1 x
+8 b1 y a2 x
+8 b2 y a1 x
+8 b2 y a2 x
+]])
+
+AT_CHECK([$JOIN -a2 b a], [0],
+[[1 a
+3 b y a1 x
+3 b y a2 x
+5 a
+6 b1 y a
+6 b2 y a
+8 b1 y a1 x
+8 b1 y a2 x
+8 b2 y a1 x
+8 b2 y a2 x
+9 a1
+9 a2
+9 a3
+]])
+
+AT_CHECK([$JOIN -a1 -a2 b a], [0],
+[[1 a
+2 b
+2 b
+3 b y a1 x
+3 b y a2 x
+4 b
+5 a
+6 b1 y a
+6 b2 y a
+7 b
+8 b1 y a1 x
+8 b1 y a2 x
+8 b2 y a1 x
+8 b2 y a2 x
+9 a1
+9 a2
+9 a3
+]])
+
+AT_CHECK([echo wat | $JOIN -v1 - /dev/null], [0],
+[[wat
+]])
+
+AT_CLEANUP