#!/usr/bin/env python
# -*- coding: utf8 -*-
# :Copyright: © 2008 Günter Milde.
#             Released  without warranties or conditions of any kind
#             under the terms of the Apache License, Version 2.0
#             http://www.apache.org/licenses/LICENSE-2.0
# :Id: $Id:  $

# ===========================================================================

# Parse the Unicode math classes file.

import sys, re, difflib, unicodedata
import parse_unimathsymbols

data = parse_unimathsymbols.read_data()

infile = file('../references/MathClassEx-14.txt', 'r')


for line in infile:
    # skip comment lines
    if line.startswith('#'):
        continue
    # parse lines into fields
    try:
        (No, 
         math_class, 
         utf8, 
         entity_name, 
         entity_set, 
         comments, 
         name) = [i.strip() for i in line.split(';')]
    except ValueError:
        if line.strip():
            print "error in line: '%s'" % line
            raise
        else:
            continue
    
    # expand ranges
    numbers = [int(n, 16) for n in No.split('..')]
    if len(numbers) == 2:
        numbers = range(numbers[0], numbers[1]+1)

    for number in numbers:
        try:
            entry = data[number]
        except KeyError:
            try:
                entry = parse_unimathsymbols.new_entry(number)
            except ValueError: # non existent Unicode char in range
                continue
            if entity_name:
                entry.comment = entity_name
            if comments and comments.find('compatibility variant') == -1:
                entry.comment += ' ' + comments
        
        entry.math_class = math_class
        # push back to data
        data[number] = entry


# Write back
# ----------

header = parse_unimathsymbols.read_header()

# Test for differences after a read-write cycle. Whitespace adjacent to the
# delimiter is not significant. ::

in_lines = file(parse_unimathsymbols.datafilename, 'r').readlines()
in_lines = [re.sub(r'[ \t]*\^[ \t]*', '^', line)
            for line in in_lines]

header = [re.sub(r' *\^ *', '^', line) for line in header]

# print "header", "".join(header)

out_lines = [str(v)+'\n' for (k,v) in data]

diff = ''.join(difflib.unified_diff(in_lines, header + out_lines,
                                    parse_unimathsymbols.datafilename, 
                                    '*round trip*'))
if diff:
    print diff
else:
    print 'no differences after round trip'

# Write back to outfile::

outfile = None
# outfile = sys.stdout
# outfile = file('../data/unimathsymbols.txt', 'w')
if outfile:
    data.header = header
    parse_unimathsymbols.write_data(data, outfile)
    if outfile != sys.stdout:
        print "Output written to", outfile.name


# for (key, entry) in sort_by_command(data):
#     print entry