#!/bin/awk -f # # Copyright © 2022 Nick Bowler # # Partial implementation of POSIX "join" command: only the "-v" and "-a" # options are implemented. # # Not all awk implementations support reading from standard input with the # getline function by specifying a filename of "-". In particular, busybox # awk will read from a file named "-" instead of standard input. Since # busybox-based environments are typically missing "join", this limitation # is problematic. As a workaround, do not use "-" for the second input # filename. # # License WTFPL2: Do What The Fuck You Want To Public License, version 2. # This is free software: you are free to do what the fuck you want to. # There is NO WARRANTY, to the extent permitted by law. BEGIN { show_uniq_lhs = 0 shoq_uniq_rhs = 0 show_common = 1 # Process command-line options for (i = 1; i < ARGC; i++) { if (substr(ARGV[i], 1, 1) != "-" || ARGV[i] == "-") break; opt = substr(ARGV[i], 2, 1); if (opt == "a" || opt == "v") { num = substr(ARGV[i], 3, 1); if (num == "") { # option argument must be next on command-line ARGV[i++] = ""; num = ARGV[i]; } if (opt == "v") { show_common = 0; } if (num == 1) { show_uniq_lhs = 1; } else if (num == 2) { show_uniq_rhs = 1; } else { # invalid argument exit 1; } } else { # unsupported option exit 1; } ARGV[i] = ""; } if (i+2 != ARGC) { # invalid usage exit 1; } file2 = ARGV[i+1]; ARGV[i+1] = ""; rhs_max_nr = rhs_nr = 0; if (advance_rhs() == 0) finish_rhs(); } { $1 = $1 } $1 == lhs_prev { # Rewind RHS as we have duplicate common keys in LHS. close(file2); rhs_nr = 0; advance_rhs(); } $1 < rhs[1] { if (show_uniq_lhs) { print; } next; } { while ($1 > rhs[1]) { if (show_uniq_rhs && rhs_nr == rhs_max_nr) print_rhs(); if (advance_rhs() == 0) finish_rhs(); if (show_uniq_lhs && $1 < rhs[1]) print; } } !rhs_eof && $1 == rhs[1] { lhs_prev = $1; do { if (show_common) print_match(); advance_rhs(); } while (!rhs_eof && $1 == rhs[1]); } END { if (show_uniq_rhs) { do { print_rhs(); } while (advance_rhs() > 0); } } function advance_rhs(raw, rc) { rc = getline raw < file2; if (rc < 0) exit(1); rhs_eof = rc == 0; if (rhs_max_nr == rhs_nr++) rhs_max_nr++; split(raw, rhs); return rc; } function finish_rhs(rc) { rc = 0; if (show_uniq_lhs) { do { if (NR > 0) { $1 = $1; print; } } while ((rc = getline) > 0); } exit(-rc); } function print_rhs(i) { if (!rhs_eof) { if (i < 1) printf "%s", rhs[1]; for (i = 2; i in rhs; i++) { printf " %s", rhs[i]; } print ""; } } function print_match(i) { printf "%s", $0 print_rhs(2); }