################################################################################ # # SPAWK -- SQL Powered AWK # ################################################################################ # # Copyright 2017, by Panos I. Papadopoulos # All Rights Reserved # ################################################################################ # # SPAWK is an AWK library to be used for MySQL/MariaDB database access. # The library makes use of the "coprocessing" AWK ability; AWK coprocessing is # about two-way communication between AWK and children processes. # # SPAWK maintains a stack of database clients, where clients of each level # handle sibling SQL queries. Whenever a new SQL query is being submitted, a # new database client is pushed in the stack and executes the submitted query. # The client will be popped off the stack after all of the query result rows # have been returned to the caller; the caller must request the result rows from # the client for the client to be released and popped off the stack. If another # query is being submitted while the current client has not yet been released, # a new client will be pushed in the stack and so on. Clients popped off the # stack do not exit but stay active while the parent AWK process is still alive. # # SPAWK library defines a small number of global objects, namely functions and # variables, e.g. "spawk_query", "spawk_fetchrow", "spawk_dlm", "spawk_level", # etc. However, it's a fact that all of the SPAWK global object names begin with # "spawk_". # # In order to use the SPAWK library, either you can include the library in the # command line using -f AWK option, or you can include the library literally # using "@include" directive in the beginning of AWK scripts. Both of these two # methods may involve the AWKPATH environment variable. # ################################################################################ # # SPAWK API # ========= # # The user may communicate with the database via a small number of SPAWK library # functions. This subset of the SPAWK library functions form the so called # "SPAWK API". # # Warning! # Although, it's not a good practice to submit direct calls of non API SPAWK # functions, nor to access or modify internal SPAWK objects of any kind # (variables etc), sometimes it's inevitable to do so. # ################################################################################ # # SPAWK API Global Variables # ========================== # # SPAWK uses a small number of global variables. Some of them are intrinsic, # others are more conceivable. # # spawk_logfile # .............................................................................. # Specify a log file in the command line in order to log the submitted queries # and the data returned from the clients. # # Warning! # Log data will be appended to the specified logfile. If the file exists it will # be modified! # # spawk_dlm # .............................................................................. # The internal delimiter used to separate one query from the next. You may not # change the delimiter, or else SPAWK will fail. By default the delimiter is the # Control-A character (ascii 1). # # Warning! # Do not submit DELIMITER commands for SPAWK execution. # # spawk_eod # .............................................................................. # Means End-Of-Data. Used for signaling the caller that a client has no more # result rows available. By default the EOD character is Control-D (ascii 4). # # spawk_level # .............................................................................. # SPAWK maintains the current stack level; stack_level 0 means that there are no # available result rows to be returned, 1 means that the current client has more # result rows available, 2 means that the top two clients have more result rows # available and so on. # ################################################################################ # # SPAWK API Functions' Reference Manual # ===================================== # # spawk_sesami(user, password, database) # .............................................................................. # Call "spawk_sesami" once in the BEGIN section of the application SPAWK # program. The parameters will be used for database clients' authentication. # Sometimes it's unavoidable to call "spawk_sesami" more than once, e.g. one may # have to submit DDL queries as the database root user and then submit DML # queries as a normal database user. # # spawk_query(query) # .............................................................................. # To submit an DQL query (select), just call "spawk_query". After such a call, # the results must be retrieved by calling other API functions. Submitting other # queries while results from previously submitted queries have not yet been # retrieved, causes a new database client to be pushed on the stack. # # spawk_fetchrow(row, nosplit) # .............................................................................. # After query submition, "spawk_fetchrow" repetitive calls return one row at a # time. Each row is returned in "row[0]", while each column value is returned in # "row[1]", "row[2]", "row[3]" etc. Passing of "nosplit" true value, e.g. 1, # causes "spawk_fetchrow" not to split fetched row to columns. The only reason # why not to split the result row in columns is efficiency. # # Returns 0 if no rows selected, else returns the number of columns, or 1 when # in no-split mode. # # spawk_fetchrest() # .............................................................................. # Call of "spawk_fetchrest" function causes any unreaded rows to be read, thus # causing the current database client to be popped off the stack. Rows readed # are lost. # # spawk_fetchone(row, nosplit) # .............................................................................. # Fetch only the first result row and skip the rest of the query results. # Everything else is just like "spawk_fetchrow". # # spawk_submit(query, ret) # .............................................................................. # Submit an insert/update/delete or DDL query for execution. For non DDL queries # "ret[1]" is set to affected rows count, while "row[2]" is set to the last # inserted ID. # # Returns -1 on failure, affected rows count on success. # ################################################################################ function spawk_sesami(user, password, database, version, n, v) { spawk_reset() spawk_user = user if (password) spawk_password = password if (database) spawk_database = database if (!spawk_password) return # Beginning with version 4.2, gawk does update its own environment when # ENVIRON is changed, thus changing the environment seen by programs # that it creates. version = PROCINFO["version"] n = split(version, v, ".") # It's safer to put database user's password in the environment than # expose it in the database client's command line. if ((n >= 2) && ((v[1] + 0) >= 4) && ((v[2] + 0) >= 2)) ENVIRON["MYSQL_PWD"] = spawk_password # The password will be exposed because it will be part of the database # client's command line. else spawk_password_cli = " --password='" spawk_password "'" } # Function "spawk_query" accepts a select SQL query and submits that query to a # new client for execution. The new client may have started earlier from another # SPAWK call. Selected data must be retrieved using appropriate SPAWK functions, # e.g. "spawk_fetchrow", "spawk_fetchone" etc. function spawk_query(query) { spawk_logwrite("SQL/DQL: " query) print query spawk_dlm "SELECT '" spawk_eod "'" spawk_dlm |& spawk_push() } # Function "spawk_fetchrow" fetches the next row from the result set of rows # returned from a previously submitted query. Row is returned in the array # passed as first parameter to the function. Indexed 0 is the row as a whole, # while indexed 1 is the first column's value, indexed 2 is the second column's # value etc. # # The number of columns is returned to the caller, until the end of data is # reached; on end of data 0 is returned. # # A second parameter with non zero value may be passed in order not to split the # columns (for efficiency reasons). In such case only 0 indexed item is returned # in the array passed, and the returned value is always 1, except on end of # data. function spawk_fetchrow(row, nosplit, data, n) { delete row data = spawk_fetchdata() spawk_logwrite(data) if (data == spawk_eod) return 0 if (nosplit) { row[0] = data return 1 } n = split(data, row, "\t") row[0] = data # If "data" is an empty string split returns 0, though in this # case we must return 1. return n ? n : 1 } # Function "spawk_fetchrest" may be called to "eat" remaining results. function spawk_fetchrest( count) { for (count = 0; spawk_fetchdata() != spawk_eod;) count++; spawk_logwrite("\t[[ " count " rows skipped ]]") } # Function "spawk_fetchone" is like "spawk_fetchrow", but remaining rows of the # result set are sent to trash. function spawk_fetchone(row, nosplit, n) { if (n = spawk_fetchrow(row, nosplit)) spawk_fetchrest() return n } # Whenever an insert/update/delete or DDL query is to be submitted, # "spawk_submit" function must be used instead of "spawk_query". Affected rows # and last inserted ID are returned in an array passed as second parameter. # # On failure -1 is returned to the caller, otherwise the number of affected rows # is returned. function spawk_submit(query, ret, logfile) { spawk_logwrite("DML/DDL: " query) logfile = spawk_logfile spawk_logfile = "" spawk_query(query spawk_dlm "SELECT ROW_COUNT(), LAST_INSERT_ID()") spawk_logfile = logfile spawk_fetchone(ret) ret[1] += 0 ret[2] += 0 return ret[1] } function spawk_error(msg) { print "SPAWK: " msg >"/dev/stderr" } function spawk_fatal(msg, err) { spawk_error(msg) exit(err + 0) } ################################################################################ BEGIN { spawk_dlm = spawk_debug ? ";" : "\001" # command delimiter spawk_eod = spawk_debug ? "_EOD_" : "\004" # end of data spawk_level = 0 } function spawk_push( client) { if (spawk_level > 99) spawk_fatal("too many clients pushed") spawk_data[++spawk_level] = 1 client = spawk_client() if (client) return client spawk_logwrite("Creating SPAWK database client " spawk_level) client = "SPAWKLEVEL=" spawk_level " mysql" client = client " --no-defaults" client = client " --batch" client = client " --raw" client = client " --force" client = client " --silent" client = client " --no-beep" client = client " --skip-column-names" client = client " --unbuffered" client = client " --delimiter " spawk_dlm client = client " --user=" spawk_user client = client spawk_password_cli if (spawk_database) client = client " --database=" spawk_database spawk_stack[spawk_level] = client return client } function spawk_pop() { spawk_data[spawk_level--] = 0 } function spawk_client() { if (spawk_level <= 0) spawk_fatal("no active database client") return spawk_stack[spawk_level] } function spawk_reset( i) { for (i in spawk_stack) close(spawk_stack[i]) delete spawk_stack delete spawk_data } function spawk_fetchdata( ret, data) { if (!spawk_data[spawk_level]) spawk_fatal("no active client") ret = (spawk_client() |& getline data) if (ret == -1) spawk_fatal("I/O error") if (ret == 0) spawk_fatal("data drain") if (data == spawk_eod) spawk_pop() return data } function spawk_logwrite(x, i) { if (!spawk_logfile) return for (i = 0; i < spawk_level; i++) printf "\t" >>spawk_logfile print x >>spawk_logfile }