package vcb.engine.tron;

import java.util.StringTokenizer;

/**
* This class parses an XML String into a Datatron and back. Only
* the very simplest aspects of XML are supported. Features are:
*
* <BLOCKQUOTE>
* 1. Duplicate tags are permitted directly within an element. 
* <p>
* 2. Only basic tags, comments and "&lt;?" are supported. A basic 
* tag is of the format
* &lt;TagName&gt;data or elements&lt;/TagName&gt;.
* Empty tags like &lt;TagName/@gt; are not supported.
* Attributes are not sopported. Entities are not yet implemented.
* <p>
* 3. Cannot mix character data and elements within an element. 
* After all, an Infotron node can only be a value (data) or a 
* container (element with children).
* <p>
* 4. Empty elements are provided with a null value, not an empty 
* String.
* <p>
* 5. All lines are trimmed and then one space is added at the end
* to separate possible words. If you want to delimit lines of
* character data, we suggest a semi-colon at the end.
* <p>
* 6. Nested comments are not supported.
* </BLOCKQUOTE>
*
* A very small parser such as this one has the advantage of 
* allowing the engine to be used in an embedded environment. We 
* expect to eventually use a third party parser as needs arise,
* the industry/technology settles down, and they become more 
* appropriate for our needs.
*
* @author Jack Harich
*/
public class Parser {

//---------- Public Fields ---------------------------------------
/**
* This is the document root tag that encloses the xml representing
* a Datatron. 
*/
public static final String DEFAULT_ROOT_TAG = "dk";

//---------- Internal Fields -------------------------------------
protected ParserConversions conversions; // null if none

//---------- Public Methods --------------------------------------
/**
* Parses the XML text into a Datatron, which is returned.
* The DEFAULT_ROOT_TAG is used to return just the portion of
* DK within that tag.
*/
public Datatron parseXMLtoDatatron(String text) {
    conversions = null;
    return parse(text);
} 
/**
* Same as <code>parseXMLtoDatatron(String text)</code> except the
* conversions are used.
*/
public Datatron parseXMLtoDatatron(String text, ParserConversions conversions) {
    this.conversions = conversions;
    return parse(text);
}
//---------- Protected Methods -----------------------------------
protected Datatron parse(String text) {
    Datatron tron = Datatron.create();
    
    // Convert lines to single string without whitespace.
    // A single space is added per line after trin to allow word separation.
    StringBuffer buffer = new StringBuffer();
    StringTokenizer lines = new StringTokenizer(text, "\n");
    while (lines.hasMoreTokens() ) {
        String line = lines.nextToken().trim() + " "; // Note trim
        buffer.append(line);
    }    
    text = buffer.toString();
    
    // Put converted text into Datatron
    text = removeSkipElements(text);
    convert(text, tron);
    // Return Datatron within DEFAULT_ROOT_TAG
    Datatron returnTron = tron.getDatatron(DEFAULT_ROOT_TAG);
    tron.release();
    return returnTron;
}
/**
* Removes all occurances of elements to be skipped like <? and <!--.
*/
protected String removeSkipElements(String text) {
    // Remove <? elements
    while(text.indexOf("<?") > -1) {
        text = removeElement(text, "<?", "?>");
    }
    // Remove <!-- elements
    while(text.indexOf("<!--") > -1) {
        text = removeElement(text, "<!--", "-->");
    }    
    return text;
}
/**
* Removes the elements in the text identified by start and end,
* starting at the beginning of the text.
*/
protected String removeElement(String text, String start, String end) {
    int startIndex = text.indexOf(start);
    if (startIndex < 0) return text;
    
    String prefix = text.substring(0, startIndex);
    String remainder = text.substring(startIndex + start.length());
    int endIndex = remainder.indexOf(end);
    if (endIndex < 0) throw new IllegalArgumentException
        ("Text has a element start '" + start + 
        "' but no element end '" + end + "'.");
    
    // end may be at very end of text
    String suffix = null;
    try {
        suffix = remainder.substring(endIndex + end.length());
    } catch(StringIndexOutOfBoundsException ex) {
        suffix = "";
    }
    text = prefix + suffix;
    return text.trim();
}
/**
* Converts the text by parsing and putting into the tron. The text
* starts with the start tag. Conversion stops when the end tag is
* reached. The unparsed text past the end key is returned.
* <p>
* In the case of elements like <? and <!-- that are not to be
* converted, they are skipped and conversion doesn't stop. 
*/
protected String convert(String text, Datatron parentTron) {

    // Get startKey, remove start tag, create endTag
    String startKey = readStartTagKey(text);
    text = removeIncluding(text, "<" + startKey + ">");
    String endTag = "</" + startKey + ">";
    
    // Handle "no value" case
    if (text.startsWith(endTag)) {
        parentTron.addString(startKey, null); // null not "" <===
        text = removeIncluding(text, endTag);   
        return text;
    }
    // Additional working variables
    Datatron myTron = null;
    String   spKey  = null;
    while (! text.startsWith(endTag)) {
    
        // Text starts with "<" or is data
         if (text.startsWith("<")) {
            // Create myTron if needed
            if (myTron == null) {
                myTron = Datatron.create();               
                //***parentTron.addDatatron(startKey, myTron); // <===
            }
            // RECURSE
            text = convert(text, myTron);
            
        } else {
            // Data for the startKey
            String value = readTo(text, "<").trim();
            text = removeTo(text, "<");
            // Overwrite, so cannot mix data and elements within an element
            parentTron.addString(startKey, value); // <=== OVERWRITE !!!
        }
    } // End while
    //***if (myTron != null) parentTron.addDatatron(startKey, myTron); // <===
    addDatatronToParent(parentTron, myTron, startKey);
    
    // Remove endTag, done
    text = removeIncluding(text, endTag);
    return text;
}
protected void addDatatronToParent(Datatron parentTron,
                                   Datatron myTron, String startKey) {
    if (myTron == null) return;
    if (conversions == null) {
        parentTron.addDatatron(startKey, myTron); // <===           
        return;
    }
    // Use conversions
    String childKey = conversions.getListChild(startKey);
    if (childKey != null) { // Null if empty or no conversion
        String newStartKey = myTron.getString(childKey);
        if (newStartKey != null) {
            // Do conversion. Use childKey for startKey, remove child.
            myTron.removeKey(childKey);
            parentTron.addDatatron(newStartKey, myTron); // <===
            //print(" - Converted " + startKey + " to " + newStartKey);
        } else {
            //print(" - childKey " + childKey + " is missing in " + startKey);
            parentTron.addDatatron(startKey, myTron); // <===
        }
    } else {
        // No conversion
        parentTron.addDatatron(startKey, myTron); // <===
        //print(" - No conversion for startKey = " + startKey);
    }
}
//---------- Helper Methods 
// All text mutators do a trim()

// Assume start tag is at beginning
protected String readStartTagKey(String text) {
    int index = text.indexOf(">");
    return text.substring(1, index);
}
// Remember end may be at the very end of the text
protected String removeTo(String text, String end) {
    int index = text.indexOf(end);
    if (index < 0) throw new IllegalArgumentException(
        "removeTo() failure because end '" + end + 
        "' is not in text.");
    
    // Or we could just catch the exception    
    if (text.length() > index + end.length()) {
        return text.substring(index).trim();
    } else {
        return "";
    }
}
// Removes all in text up to and including end.
protected String removeIncluding(String text, String end) {
    int index = text.indexOf(end);
    if (index < 0) throw new IllegalArgumentException(
        "removeIncluding() failure because end '" + end +
        "' is not in text. " + text);
        
    // Or we could just catch the exception 
    if (text.length() > index + end.length()) {
        return text.substring(index + end.length()).trim();
    } else {
        return "";
    }
}
protected String readTo(String text, String end) {
    int index = text.indexOf(end);
    if (index < 0) throw new IllegalArgumentException(
        "readTo() failure because end '" + end + 
        "' is not in text '" + text + "'.");
    return text.substring(0, index);
}
//---------- Standard --------------------------------------------
private static void print(String text) {
    System.out.println("Parser" + text);
}

} // End class