import structure5.*;
import java.io.FileInputStream;
import java.util.Scanner;

/**
 * The purpose of this program is to investigate hash function
 * quality, notably uniformity.  We look at three hash functions
 * for the String data type.
 *
 * 1. using the ASCII value of the first letter of the string.
 * 2. using the ASCII value of the last letter of the string.
 * 3. using the sum of the ASCII values of all of the letters
 *    in the string.
 *
 * A "good" hash function will distribute values uniformly
 * in an array.  Since we're really only considering a-z
 * here, we will see how uniformly we can hash strings to
 * 26 buckets.  A perfect hash would uniformly distribute
 * values to all 26 buckets.  That doesn't happen with these
 * but one of them gets surprisingly close!
 */

class LetterFreq {
    /* This index function is the ASCII value of the
     * first letter mod 26.
     */
    public static int indexLastChar(String word) {
      return (int)word.charAt(word.length() - 1) % 26;
    }

    /* This index function is the ASCII value of the
     * last letter mod 26.
     */
    public static int indexFirstChar(String word) {
      return (int)word.charAt(0) % 26;
    }

    /* This index function is the sum of the ASCII values
     * of all the letters mod 26.
     */
    public static int indexAllChars(String word) {
      int sum = 0;
      for (int i = 0; i < word.length(); i++) {
        sum += (int)word.charAt(i);
      }
      return sum % 26;
    }

    public static String makeHistogram(Map<Integer,Integer> counts) {
      String s = "";
      for (Integer key: counts.keySet()) {
        s += key + ": " + counts.get(key) + "\n";
      }
      return s;
    }

    /* The purpose of this program is to explore whether
     * any of the following three hash functions are "good"
     * when hashing common (US) English words:
     * 1. The first letter of the word.
     * 2. The last letter of the word.
     * 3. The sum of all of the letters of the word.
     */
    public static void main(String[] args) {
    Map<Integer,Integer> first_counts = new Hashtable<>();
    Map<Integer,Integer> last_counts = new Hashtable<>();
    Map<Integer,Integer> sum_counts = new Hashtable<>();

    try {
        Scanner input = new Scanner(new FileInputStream("google-10000-english-usa.txt"));

        while(input.hasNext()) {
          // get next word
          String word = input.next().toLowerCase();
      
          // get last letter of word
          int last = indexLastChar(word);

          // get first letter of word
          int first = indexFirstChar(word);

          // get sum of all letters
          int hval = indexAllChars(word);

          // count first
          if (!first_counts.containsKey(first)) {
            first_counts.put(first, 0);
          }
          first_counts.put(first, first_counts.get(first) + 1);

          // count last
          if (!last_counts.containsKey(last)) {
            last_counts.put(last, 0);
          }
          last_counts.put(last, last_counts.get(last) + 1);
    
          // count sum
          if (!sum_counts.containsKey(hval)) {
              sum_counts.put(hval, 0);
          }
          sum_counts.put(hval, sum_counts.get(hval) + 1);
        }
    } catch (Exception e) {
        // quit
        System.out.println("Can't find input!");
        System.exit(1);
    }

    // first letter distribution
    System.out.println("First letter as hash:");
    System.out.println(makeHistogram(first_counts));

    // last letter distribution
    System.out.println("Last letter as hash:");
    System.out.println(makeHistogram(last_counts));
  
    // sum value distribution
    System.out.println("Sum of letters as hash:");
    System.out.println(makeHistogram(sum_counts));
   }
}