package uhr.core.tron;

import java.util.StringTokenizer;

/**
* This class parses XML into a data structure. Only the very 
* simplest aspects of XML are supported. Features are:
*
* <BLOCKQUOTE>
* 1. Duplicate tags are permitted directly within an element. 
* <p>
* 2. Only basic tags, comments and "&lt;?" are supported. A basic 
* tag is of the format
* &lt;TagName&gt;data or elements&lt;/TagName&gt;.
* Empty tags like &lt;TagName/@gt; are not supported.
* Attributes are not sopported. Entities are not yet implemented.
* <p>
* 3. Cannot mix character data and elements within an element. 
* After all, an Infotron node can only be a value (data) or a 
* container (element with children).
* <p>
* 4. Empty elements are provided with a null value, not an empty 
* String.
* <p>
* 5. All lines are trimmed and then one space is added at the end
* to separate possible words. If you want to delimit lines of
* character data, we suggest a semi-colon at the end.
* </BLOCKQUOTE>
*
* A very small parser such as this one has the advantage of allowing
* the core to be parameter driven in an embedded environment. We 
* expect to eventually use a third party parser as needs arise,
* the industry/technology settles down, and they become more 
* appropriate for our needs.
* <p>
* The unit test is: java -nojit uhr.core.test.SimpleParserXML_Test1
*
* @author Jack Harich
*/
public class SimpleParserXML {

//---------- Public Methods --------------------------------------
/**
* Parses the text into an Infotron. See class documentation.
*/
public Infotron parse(String text) {
    
    // Convert lines to single string without whitespace.
    // A single space is added per line after trin to allow word separation.
    StringBuffer buffer = new StringBuffer();
    StringTokenizer lines = new StringTokenizer(text, "\n");
    while (lines.hasMoreTokens() ) {
        String line = lines.nextToken().trim() + " "; // Note trim
        buffer.append(line);
    }    
    text = buffer.toString();
    
    // Convert text to Infotron and we're done
    text = removeSkipElements(text);
    Infotron tron = new InfotronStd();
    convert(text, tron);
    
    return tron;
}
//---------- Protected Methods -----------------------------------
/**
* Removes all occurances of elements to be skipped like <? and <!--.
*/
protected String removeSkipElements(String text) {
    // Remove <? elements
    while(text.indexOf("<?") > -1) {
        text = removeIncluding(text, "?>");
    }
    // Remove <!-- elements
    while(text.indexOf("<!--") > -1) {
        text = removeIncluding(text, "-->");
    }    
    return text;
}
/**
* Converts the text by parsing and putting into the tron. The text
* starts with the start tag. Conversion stops when the end tag is
* reached. The unparsed text past the end key is returned.
* <p>
* In the case of elements like <? and <!-- that are not to be
* converted, they are skipped and conversion doesn't stop. 
*/
protected String convert(String text, Infotron parentTron) {

    // Get startKey, remove start tag, create endTag
    String startKey = readStartTagKey(text);
    text = removeIncluding(text, "<" + startKey + ">");
    String endTag = "</" + startKey + ">";
    
    // Handle "no value" case
    if (text.startsWith(endTag)) {
        parentTron.add(startKey, null);
        text = removeIncluding(text, endTag);   
        return text;
    }
    // Additional working variable
    Infotron myTron = null;
    
    while (! text.startsWith(endTag)) {
    
        // Text starts with "<" or is data
         if (text.startsWith("<")) {
            // Create myTron if needed
            if (myTron == null) {
                // Clone and removeAll may be safer than new
                myTron = new InfotronStd(); 
                parentTron.add(startKey, myTron);
                
            }
            // RECURSE
            text = convert(text, myTron);
            
        } else {
            // Data for the startKey
            String value = readTo(text, "<").trim();
            text = removeTo(text, "<");
            // Overwrite, so cannot mix data and elements within an element
            parentTron.add(startKey, value); // **** OVERWRITE !!!
        }
    
    } // End while
    
    // Remove endTag
    text = removeIncluding(text, endTag);
    
    return text;
}
//---------- Helper Methods 
// All text mutators do a trim()

// Assume start tag is at beginning
protected String readStartTagKey(String text) {
    int index = text.indexOf(">");
    return text.substring(1, index);
}
// Remember end may be at the very end of the text
protected String removeTo(String text, String end) {
    int index = text.indexOf(end);
    if (index < 0) throw new IllegalArgumentException(
        "removeTo() failure because end '" + end + 
        "' is not in text.");
    
    // Or we could just catch the exception    
    if (text.length() > index + end.length()) {
        return text.substring(index).trim();
    } else {
        return "";
    }
}
protected String removeIncluding(String text, String end) {
    int index = text.indexOf(end);
    if (index < 0) throw new IllegalArgumentException(
        "removeIncluding() failure because end '" + end +
        "' is not in text. " + text);
        
    // Or we could just catch the exception 
    if (text.length() > index + end.length()) {
        return text.substring(index + end.length()).trim();
    } else {
        return "";
    }
}
protected String readTo(String text, String end) {
    int index = text.indexOf(end);
    if (index < 0) throw new IllegalArgumentException(
        "readTo() failure because end '" + end + 
        "' is not in text.");
    return text.substring(0, index);
}
//---------- Standard --------------------------------------------
private static void print(String text) {
    System.out.println("SimpleParserXML" + text);
}

} // End class