#!/usr/bin/env python3
# ================================================
# Title			  : rqda_code_builder.py         #
# Description	  : RQDA Code Builder with YAML  #
# Author		  : Diarmuid O'Briain            # 
# Date			  : 20180930                     #
# Version		  : 1.4                          #
# Python Version  : 3.5.2                        #
# Usage			  : python3 rqda_code_builder.py #
# License         : www.gnu.org/licenses/gpl.txt #
# ================================================

'''
DEPENDENCIES

  $ sudo apt install python3
  $ sudo apt-get install python-yaml

################################## NOTES #################################

The RQDA() library for the R Statistical computation and graphics language.
RQDA() allows for the bulk uploading of source files but not for the codes
or code categories. This is fine for deductive qualitative analysis as the
codes are developed as the papers are read, however for inductive analysis
the process of uploading is very timeconsuming through the GUI tool. The 
database file created by RQDA() is SQLite3 and this program will take yaml
file input and automatically build the database entries for categories and
codes. Build the YAML file as follows:

codes.yaml                  example: codes.yaml
---                                       ---

Category_1:                               Organisation: 
 -  'Code_1_1'                             - Finance
 -  'Code_1_2'                             - Engineering

Category_2:                               Products:
 -  'Code_2_1'                             - Green
 -  'Code_2_2'                             - Brown
 -  'Code_2_3'                             - Red
 -  'Code_2_4'                             - Blue

Category_3:                               HR:
 -  'Code_2_1'                             - Pension 
 -  'Code_2_2'                             - Salary
 -  'Code_2_3'                             - Health

In a directory place:
  - rqda_code_builder.py  : This program.
  - codes.yaml            : Codes file formatted as YAML shown above.
  - Transcript-texts      : Directory of transcript files.

Open a shell in this directory and run 'R', then from the 'R' shell 
run the RQDA() libary. 

    MyShell~$ R

    R version 3.4.4 (2018-03-15) 

    > library (RQDA)

RQDA Graphic will appear.

Create a 'New Project'
----------------------

Select "New Project", say called 'RQDA_test' and place it in the same 
directory. A new file 'RQDA_test.rqda' appears. This file is an 
SQLite3 database.

Close the RQDA GUI and select 'OK' to the really EXIT? prompt.

Upload the 'Categories' and 'Codes'
-----------------------------------

Upload the 'Categories' and 'Codes' from the YAML file.

Run the 'rqda_code_builder.py' program.

    MyShell~$ ./rqda_code_builder.py -c myuser -d test.rqda -y codes.yaml
    RQDA Code Builder
    ----------------- 

    Connecting to the SQLite3 database test.rqda.
    Connected to the SQLite3 database test.rqda. Uploading..
    Upload completed
    ----------------

    A full list of SDL commands executed can be seen in the 'RQDA_SQL.log' file.

    You can restart the RQDA() library with the following command in the R shell:

    > RQDA()

In the RQDA GUI select 'Open Project' and pick the 'test.rqda' file.

Check the 'Code Categories" and 'Codes' tabs and you will see them populated.

Load Source files
-----------------

To bulk upload the source files execute this command from the R shell.

    > addFilesFromDir('./Transcript-texts', pattern = "*.txt$")

Check the RQDA GUI and you will notice the files under the 'Files' tab.

You are ready to start inductive coding with RQDA().

Coding by Search
----------------

This file will generate a file (RQDA_R_search_cmds.R) of 'R' commands. 
These commands match each code with the codes ID. For example 'Blue' here
is linked to the code ID 'Blue'. It will code all lines in all the source
files that end with a period (.), an exclaimation mark (!) or a question 
mark (?). 

    codingBySearch('Blue',fid=getFileIds(),cid=1,seperator='[.!?]')
    codingBySearch('Brown',fid=getFileIds(),cid=2,seperator='[.!?]')
    codingBySearch('Engineering',fid=getFileIds(),cid=3,seperator='[.!?]')
    codingBySearch('Finance',fid=getFileIds(),cid=4,seperator='[.!?]')
    codingBySearch('Green',fid=getFileIds(),cid=5,seperator='[.!?]')
    ...
    ...

You may wish to add more, for example you may want lines containing the 
word 'Green' to be tagged with the code ID for 'Green'. The code ID for 
'Green' is '5', so add the line:

    codingBySearch('Grass',fid=getFileIds(),cid=5,seperator='[.!?]')

and coding instances of 'Sky' with the code ID for 'Blue':

    codingBySearch('Sky',fid=getFileIds(),cid=1,seperator='[.!?]')

Execute these commands through the 'R' shell.

    > source('RQDA_R_search_cmds.R')

############################### End of notes #############################

'''

# // Imports //

import sys
import yaml
import re
import sqlite3
from time import gmtime, strftime

# // Declarations //

DEBUG = False   # True / False
sql_log = 'RQDA_SQL.log'
R_search_cmds = 'RQDA_R_search_cmds.R'

# // Check if the 'pyyaml' package is installed //

if not ('yaml.parser' in sys.modules):
    print ('ERROR: The python3 \'pyyaml\' is not installed.')
    print ('\n       $ sudo pip3 install pyyaml\n')
    sys.exit(1)

# // Confirm the program is a Top-level environment //

if (__name__ == "__main__"):
    print ('\nRQDA Code Builder')
    print ('-' * 17, '\n')

# // Functions //

def rqda_code_builder_help(err_code):

    # 'rqda_code_builder_help()' function

    print("""Usage: rqda_code_builder.py [-h| help] -c|--coder [Name] -d|--database [DB] -y|--yaml [YAML] 

    IXP Testbed schema builder
    --------------------------

    OPTION

    -h, --help  -  This help message.

    ARGUMENTS

    -c|--coder [Name]   - Define coder, must match that from RQDA() settings.
    -d|--database [DB] -  Define path to SQLite3 database file.
    -y|--yaml [YAML]   -  Define path to YAML code file.
    """)
    sys.exit(err_code)

    # End of 'rqda_code_builder_help()' function

def rqda_code_builder_switches(switch_argv):

    # 'rqda_code_builder_switches()' function

    db_file = ''
    yaml_file = ''
    codername = ''
    cli_switch = []
    cli_switches = []
    switches = ('-d', '--database', '-y', '--yaml', '-c', '--coder')

    # // Confirm there are no illegal switches //
    for argument in switch_argv:
        cli_switch = re.findall('^(-{1,2}.*)$', argument)
        if (len(cli_switch) == 1):
            if not (cli_switch[0] in switches):
                print ('ERROR: there is no switch {}.'.format(cli_switch[0]))
                rqda_code_builder_help(1)

    for switch in switches:
        if (switch in switch_argv):
            switch_pos = switch_argv.index(switch)
            if (switch == '-h' or switch == '--help'):
                rqda_code_builder_help(0)
            elif (switch == '-d' or switch == '--database'):
                db_file = switch_argv[switch_pos + 1]
            elif (switch == '-y' or switch == '--yaml'):
                yaml_file = switch_argv[switch_pos + 1]
            elif (switch == '-c' or switch == '--coder'):
                codername = switch_argv[switch_pos + 1]

    return (db_file, yaml_file, codername)

    # End of 'rqda_code_builder_switches()' function

def rcb_code_handler(switch_values):

    (db_file, yaml_file, codername) = switch_values

    # // Open configuration file //

    try: 
        yamlfile = open(yaml_file, 'r')
    except: 
        print ('ERROR: No {} exists.'.format(yaml_file))
        sys.exit(1)

    # // Create an SQL log file //

    try: 
        logfile = open(sql_log, 'w')
    except: 
        print ('ERROR: Cannot open {} file.'.format(sql_log))
        sys.exit(1)

 # // Create a RQDA 'R' code by search command list' file //

    try: 
        searchfile = open(R_search_cmds, 'w')
    except: 
        print ('ERROR: Cannot open {} file.'.format(R_search_cmds))
        sys.exit(1)

    # // Load yaml file details //

    if (DEBUG):
        print ('DEBUG: Importing configuration from {}.'.format(yaml_file))

    yaml_docs = list(yaml.load_all(yamlfile))

    if (DEBUG):
        print ('DEBUG: Imported categories and codes.')
    
    catcount = codecount = 0
    codecat_head = 'INSERT INTO codecat (name, catid, owner, date, status) VALUES '
    freecode_head = 'INSERT INTO freecode (name, owner, date, id, status) VALUES '
    treecode_head = 'INSERT INTO treecode (cid, catid, date, dateM, status, owner) VALUES '
    db_date = strftime("%a %b %d %H:%M:%S %Y", gmtime())
     
    # // Try connect to the SQLite3 database //

    print ('Connecting to the SQLite3 database {}.'.format(db_file))

    try: 
        conn = sqlite3.connect(db_file)
    except:
        print ('ERROR: Couldn\'t connect to SQLite3 DB {}.'.format(db_file))

    c = conn.cursor()

    # // Delete existing entries from the database tables //

    codecat_delete = 'DELETE FROM codecat'
    freecode_delete = 'DELETE FROM freecode'
    treecode_delete = 'DELETE FROM treecode'
    c.execute(codecat_delete)
    c.execute(freecode_delete)
    c.execute(treecode_delete)
    conn.commit()

    # // Uploading new entries to the database tables //

    print ('Connected to the SQLite3 database {}. Uploading..'.format(db_file))

    if (DEBUG):
        print ('DEBUG: Categories and codes uploading from {} to {}.'.format(yaml_file, db_file))

    for category in yaml_docs[1]:
        catcount += 1
        if (DEBUG):
              print ('\n{}:'.format(category))
        codecat_entry = codecat_head + "('{}', {}, '{}', '{}', 1)".format(category, catcount, codername, db_date)
        logfile.write (codecat_entry)
        logfile.write ("\n")
        c.execute(codecat_entry)
        for code in yaml_docs[1][category]:
            codecount += 1
            if (DEBUG):
                print ('   - {}'.format(code))
            freecode_entry = freecode_head + "('{}', '{}', '{}', {}, 1)".format(code, codername, db_date, codecount)
            treecode_entry = treecode_head + "({}, {}, '{}', '{}', 1, '{}')".format(codecount, catcount, db_date, db_date, codername)
            logfile.write (freecode_entry)
            logfile.write ("\n")
            c.execute(freecode_entry)
            logfile.write (treecode_entry)
            logfile.write ("\n")
            c.execute(treecode_entry)

            # // Generate 'R' searchfile //
            searchcmd_entry = "codingBySearch('{}',fid=getFileIds(),cid={},seperator='[.!?]')".format(code, codecount)
            searchfile.write (searchcmd_entry)
            searchfile.write ("\n")
            if re.match('^[A-Z][a-z0-9]+', code):
                codelc = code.lower() 
                searchcmd_entry = "codingBySearch('{}',fid=getFileIds(),cid={},seperator='[.!?]')".format(codelc, codecount)
                searchfile.write (searchcmd_entry)
                searchfile.write ("\n")

    # // Commit and close database connection //
    conn.commit()
    conn.close()

    if (DEBUG):
        print ('\nDEBUG: Committing configuration to the SQLite3 database {}.'.format(db_file))

    print ('\nUpload completed\n----------------\n')
    print ('A full list of SDL commands executed can be seen in the \'{}\' file.'.format(sql_log))
    print ('\nYou can restart the RQDA() library with the following command in the R shell:\n')
    print ('> RQDA()\n')

    # // Close files //
    logfile.close()
    yamlfile.close()
    searchfile.close()

    # End of 'rcb_code_handler()' function



# // Deals with no arguments, more than 7 or help called //
# // Then passes to the rqda_code_builder_switches() function //

if (len(sys.argv) == 1 or len(sys.argv) >= 8):
    rqda_code_builder_help(1)
else:
    switch_values = rqda_code_builder_switches(sys.argv)

# // Call the 'rcb_code_handler()' function //

rcb_code_handler (switch_values)

# // Exit program //

sys.exit(0)

# END
