import java.util.Date; import java.util.HashMap; import java.util.Map; import java.util.Comparator; import java.util.Arrays; import java.io.FileReader; import java.io.BufferedReader; public class WebStat { public static class IntVal { private int iv; public IntVal(int iv){ this.iv = iv; } public void add(int c){ iv += c; } public int intValue(){ return iv; } } public static void incCount(HashMap hm, String key){ if (hm.containsKey(key)){ IntVal count = hm.get(key); count.add(1); } else { hm.put(key, new IntVal(1)); } } public static void print_top20(String label, HashMap h){ Map.Entry[] entries = (Map.Entry[])new Map.Entry[h.size()]; entries = h.entrySet().toArray(entries); Arrays.sort(entries, new Comparator >(){ public int compare(Map.Entry a, Map.Entry b){ return -1*(a.getValue().intValue() - b.getValue().intValue()); } }); System.out.println("------------ " + label + " -------------"); for (int i = 0; i < 20; i++){ System.out.printf("%2d. %s (%d)\n", i, entries[i].getKey(), entries[i].getValue().intValue()); } System.out.println(); } public static void main(String[] args) throws Exception { if (args.length < 1){ System.out.println("Usage:: [ruby] webstat.rb "); System.exit(0); } BufferedReader br = new BufferedReader(new FileReader(args[0])); long t1 = System.currentTimeMillis(); int nlines = 0; Date start_date = null, end_date = null; LogEntry le = null; HashMap hosts = new HashMap(); HashMap urls = new HashMap(); HashMap referrers = new HashMap(); HashMap uastrings = new HashMap(); String line; long st = System.currentTimeMillis(); while ((line = br.readLine()) != null){ try { le = new LogEntry(line); if (start_date == null) start_date = le.getDate(); incCount(hosts, le.getHost()); incCount(urls, le.getURL()); incCount(referrers, le.getReferrer()); incCount(uastrings, le.getUa()); } catch (Exception exc){ System.out.println("Log entry parse failed at line: " + (nlines + 1) + ", error: " + exc); System.out.println("LINE: " + line); } nlines += 1; if (nlines % 4096 == 0){ long et = System.currentTimeMillis(); System.out.println("processed " + nlines + " lines ... (" + (et - st)/1000.0 + " seconds)"); st = et; } } end_date = le.getDate(); long t2 = System.currentTimeMillis(); System.out.printf("start_date:%s, end_date:%s\n", start_date, end_date); System.out.printf("lines:%d, hosts:%d, urls:%d, referrers:%d, uastrings:%d\n", nlines, hosts.size(), urls.size(), referrers.size(), uastrings.size()); System.out.println("Processing time : " + (t2 - t1)/1000.0 + " seconds"); //System.err.println("Hit Enter to continue ..."); //System.in.read(); //System.in.skip(32); // Do the sorting and display of top 20 t1 = System.currentTimeMillis(); print_top20("Top 20 Hosts", hosts); print_top20("Top 20 URLs", urls); print_top20("Top 20 Referrers", referrers); print_top20("Top 20 UA Strings", uastrings); t2 = System.currentTimeMillis(); System.out.println("Sort and Display time : " + (t2 - t1)/1000.0 + " seconds"); //System.err.println("Hit Enter to continue ..."); //System.in.read(); } }