Linguistics 473
Computational Linguistics Fundamentals
Summer 2017

Project 3


Project Description

Finite State Transducer (FST) Using a switch statement

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;

namespace LING473Project3
{
    class Project3
    {

        static void Main(string[] args)
        {
            FST fst = new FST();
            // Assumption: args[0] is input, args[1] is output
            using (StreamWriter sw = new StreamWriter(args[1]))
            {
                sw.WriteLine("");
                foreach (String line in File.ReadAllLines(args[0]))
                    sw.WriteLine(fst.Breaker(line) + "
"); sw.WriteLine(""); } } class FST { HashSet V1 = new HashSet("เแโใไ"); HashSet C1 = new HashSet("กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรฤลฦวศษสหฬอฮ"); HashSet C2 = new HashSet("รลวนม"); HashSet V2 = new HashSet("ิีึืุูั็"); HashSet T = new HashSet { '\u0E48', '\u0E49', '\u0E4A', '\u0E4B' }; HashSet V3 = new HashSet("าอยว"); HashSet C3 = new HashSet("งนมดบกยว"); int state = 0; public String Breaker(string input) { state = 0; StringBuilder output = new StringBuilder(); for (int i = 0; i < input.Length; i++ ) { char c = input[i]; if (state == 0) { output.Append(c); if (V1.Contains(c)) state = 1; else if (C1.Contains(c)) state = 2; else state = -1; //fail state } else if (state == 1) { output.Append(c); if (C1.Contains(c)) state = 2; else state = -1; } else if (state == 2) { output.Append(c); if (C2.Contains(c)) state = 3; else if (V2.Contains(c)) state = 4; else if (T.Contains(c)) state = 5; else if (V3.Contains(c)) state = 6; else if (C3.Contains(c)) state = 9; else if (V1.Contains(c)) state = 7; else if (C1.Contains(c)) state = 8; else state = -1; } else if (state == 3) { output.Append(c); if (V2.Contains(c)) state = 4; else if (T.Contains(c)) state = 5; else if (V3.Contains(c)) state = 6; else if (C3.Contains(c)) state = 9; else state = -1; } else if (state == 4) { output.Append(c); if (T.Contains(c)) state = 5; else if (V3.Contains(c)) state = 6; else if (C3.Contains(c)) state = 9; else if (V1.Contains(c)) state = 7; else if (C1.Contains(c)) state = 8; else state = -1; } else if (state == 5) { output.Append(c); if (V3.Contains(c)) state = 6; else if (C3.Contains(c)) state = 9; else if (V1.Contains(c)) state = 7; else if (C1.Contains(c)) state = 8; else state = -1; } else if (state == 6) { output.Append(c); if (C3.Contains(c)) state = 9; else if (V1.Contains(c)) state = 7; else if (C1.Contains(c)) state = 8; else state = -1; } else if (state == 7) { state = 1; output.Insert(output.Length - 1, " "); i--; //don't consume an input } else if (state == 8) { state = 2; output.Insert(output.Length - 1, " "); i--; //don't consume an input } else if (state == 9) { state = 0; output.Append(" "); i--; //don't consume an input; } } if (state == -1) return "failed to parse"; else return output.ToString(); } } } }
To compile on patas:
$ gmcs proj3.cs
$ mono proj3.exe /dropbox/17-18/473/project3/fsm-input.utf8.txt fsm-output.html