Logo Search packages:      
Sourcecode: fcitx version File versions  Download package

createPYMB.c

/***************************************************************************
 *   Copyright (C) 2002~2005 by Yuking                                     *
 *   yuking_net@sohu.com                                                   *
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 *   This program is distributed in the hope that it will be useful,       *
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
 *   GNU General Public License for more details.                          *
 *                                                                         *
 *   You should have received a copy of the GNU General Public License     *
 *   along with this program; if not, write to the                         *
 *   Free Software Foundation, Inc.,                                       *
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
 ***************************************************************************/
#include <stdio.h>
#include <string.h>

#define PARSE_INPUT_SYSTEM ' '

#include "pyParser.h"
#include "pyMapTable.h"
#include "PYFA.h"
#include "sp.h"

extern PYTABLE  PYTable[];

FILE           *fps, *fpt, *fp1, *fp2;
Bool            bSingleHZMode = False;
Bool            bFullPY = False;

00036 typedef struct _PY {
    char            strPY[3];
    char            strHZ[3];
    struct _PY     *next, *prev;
} _PyStruct;

00042 typedef struct _PyPhrase {
    char           *strPhrase;
    char           *strMap;
    struct _PyPhrase *next;
    unsigned int    uIndex;
} _PyPhrase;

00049 typedef struct _PyBase {
    char            strHZ[3];
    struct _PyPhrase *phrase;
    int             iPhraseCount;
    unsigned int    iIndex;
} _PyBase;

00056 typedef struct {
    char            strMap[3];
    struct _PyBase *pyBase;
    int             iHZCount;
    //char *strMohu;
} __PYFA;

int             iPYFACount;
__PYFA         *PYFAList;
int             YY[1000];
int             iAllCount;

Bool LoadPY (void)
{
    FILE           *fp;
    int             i, j;
    int             iSW;;

    fp = fopen ("pybase.mb", "rb");
    if (!fp)
      return False;

    fread (&iPYFACount, sizeof (int), 1, fp);
    PYFAList = (__PYFA *) malloc (sizeof (__PYFA) * iPYFACount);
    for (i = 0; i < iPYFACount; i++) {
      fread (PYFAList[i].strMap, sizeof (char) * 2, 1, fp);
      PYFAList[i].strMap[2] = '\0';
      fread (&(PYFAList[i].iHZCount), sizeof (int), 1, fp);
      PYFAList[i].pyBase = (_PyBase *) malloc (sizeof (_PyBase) * PYFAList[i].iHZCount);
      for (j = 0; j < PYFAList[i].iHZCount; j++) {
          fread (PYFAList[i].pyBase[j].strHZ, sizeof (char) * 2, 1, fp);
          PYFAList[i].pyBase[j].strHZ[2] = '\0';
          PYFAList[i].pyBase[j].phrase = (_PyPhrase *) malloc (sizeof (_PyPhrase));
          PYFAList[i].pyBase[j].phrase->next = NULL;
          PYFAList[i].pyBase[j].iPhraseCount = 0;
      }
    }

    fclose (fp);

    i = 0;

    while (1) {
      iSW = 0;
      for (j = 0; j < iPYFACount; j++) {
          if (i < PYFAList[j].iHZCount) {
            PYFAList[j].pyBase[i].iIndex = iAllCount--;
            iSW = 1;
          }
      }
      if (!iSW)
          break;
      i++;
    }

    fp = fopen ("pybase.mb", "wb");
    if (!fp)
      return False;

    fwrite (&iPYFACount, sizeof (int), 1, fp);
    for (i = 0; i < iPYFACount; i++) {
      fwrite (PYFAList[i].strMap, sizeof (char) * 2, 1, fp);
      fwrite (&(PYFAList[i].iHZCount), sizeof (int), 1, fp);
      for (j = 0; j < PYFAList[i].iHZCount; j++) {
          fwrite (PYFAList[i].pyBase[j].strHZ, sizeof (char) * 2, 1, fp);
          fwrite (&(PYFAList[i].pyBase[j].iIndex), sizeof (int), 1, fp);
      }
    }

    fclose (fp);

    return True;
}

void CreatePYPhrase (void)
{
    char            strPY[256];
    char            strPhrase[256];
    char            strMap[256];
    ParsePYStruct   strTemp;
    int             iIndex, i, s1, s2, j, k;
    _PyPhrase      *phrase, *t, *tt;
    FILE           *f = fopen ("pyERROR", "wt");
    FILE           *fg = fopen ("pyPhrase.ok", "wt");
    int             kkk;
    unsigned int    uIndex, uTemp;

    s1 = 0;
    s2 = 0;
    uIndex = 0;
    while (!feof (fpt)) {
      fscanf (fpt, "%s", strPY);
      fscanf (fpt, "%s\n", strPhrase);
      if (strlen (strPhrase) < 3)
          continue;

      ParsePY (strPY, &strTemp, PY_PARSE_INPUT_SYSTEM);
      s2++;
      kkk = 0;
      //printf("%s  %s  %s   %d\n",strPY,strPhrase,strTemp.strMap,strTemp.iHZCount);
      if (strTemp.iHZCount != strlen (strPhrase) / 2 || (strTemp.iMode & PARSE_ABBR)) {
          //if ( strlen(strPhrase)==4 )
          fprintf (f, "%s %s\n", strPY, strPhrase);   //"%s %s %s\n", strPY, strPhrase, strTemp.strPYParsed);
          continue;
      }

      strMap[0] = '\0';
      for (iIndex = 0; iIndex < strTemp.iHZCount; iIndex++)
          strcat (strMap, strTemp.strMap[iIndex]);

      for (iIndex = 0; iIndex < iPYFACount; iIndex++) {
          if (!strncmp (PYFAList[iIndex].strMap, strMap, 2)) {
            for (i = 0; i < PYFAList[iIndex].iHZCount; i++) {
                if (!strncmp (PYFAList[iIndex].pyBase[i].strHZ, strPhrase, 2)) {
                  t = PYFAList[iIndex].pyBase[i].phrase;
                  for (j = 0; j < PYFAList[iIndex].pyBase[i].iPhraseCount; j++) {
                      tt = t;
                      t = t->next;
                      if (!strcmp (t->strMap, strMap + 2) && !strcmp (t->strPhrase, strPhrase + 2)) {
                        printf ("%d: %s %s ----->deleted.\n", s2, strPY, strPhrase);
                        goto _next;
                      }
                      if (strcmp (t->strMap, strMap + 2) > 0) {
                        t = tt;
                        break;
                      }
                  }

                  phrase = (_PyPhrase *) malloc (sizeof (_PyPhrase));
                  phrase->strPhrase = (char *) malloc (sizeof (char) * (strlen (strPhrase) - 1));
                  phrase->strMap = (char *) malloc (sizeof (char) * (strTemp.iHZCount * 2 - 1));
                  phrase->uIndex = uIndex++;
                  strcpy (phrase->strPhrase, strPhrase + 2);
                  strcpy (phrase->strMap, strMap + 2);

                  tt = t->next;
                  t->next = phrase;
                  phrase->next = tt;
                  PYFAList[iIndex].pyBase[i].iPhraseCount++;
                  s1++;
                  kkk = 1;
                  _next:
                  ;
                }
            }
          }
      }
      if (!kkk)
          fprintf (f, "%s %s %s\n", strPY, strPhrase, (char *) (strTemp.strPYParsed));
      else
          fprintf (fg, "%s %s\n", strPY, strPhrase);
    }
//    printf ("\n共读入 %d 个词组,成功转换 %d 个!\n正在写入词组文件……  ", s2, s1);
    for (i = 0; i < iPYFACount; i++) {
      for (j = 0; j < PYFAList[i].iHZCount; j++) {
          iIndex = PYFAList[i].pyBase[j].iPhraseCount;
          if (iIndex) {
            fwrite (&i, sizeof (int), 1, fp2);
            fwrite (PYFAList[i].pyBase[j].strHZ, sizeof (char) * 2, 1, fp2);
            
            fwrite (&iIndex, sizeof (int), 1, fp2);
            t = PYFAList[i].pyBase[j].phrase->next;
            for (k = 0; k < PYFAList[i].pyBase[j].iPhraseCount; k++) {
                iIndex = strlen (t->strMap);
                fwrite (&iIndex, sizeof (int), 1, fp2);
                fwrite (t->strMap, sizeof (char), iIndex, fp2);
                fwrite (t->strPhrase, sizeof (char), strlen (t->strPhrase), fp2);
                uTemp = uIndex - 1 - t->uIndex;
                fwrite (&uTemp, sizeof (unsigned int), 1, fp2);
                t = t->next;
            }
          }
      }
    }
    printf ("\nOK!\n");
    
    fclose (fp2);
    fclose (fpt);
}

void CreatePYBase (void)
{
    _PyStruct      *head, *pyList, *temp, *t;
    char            strPY[7], strHZ[3], strMap[3];
    int             iIndex, iCount, i;
    int             iBaseCount;
    int             s = 0;
    int             tt = 0;

    head = (_PyStruct *) malloc (sizeof (_PyStruct));
    head->prev = head;
    head->next = head;

    iBaseCount = 0;
    while (PYTable[iBaseCount].strPY[0] != '\0')
      iBaseCount++;
    for (iIndex = 0; iIndex < iBaseCount; iIndex++)
      YY[iIndex] = 0;
    iIndex = 0;

    while (!feof (fps)) {
      fscanf (fps, "%s", strPY);
      fscanf (fps, "%s\n", strHZ);

      if (MapPY (strPY, strMap, PARSE_INPUT_SYSTEM)) {
          for (i = 0; i < iBaseCount; i++)
            if ((!strcmp (PYTable[i].strPY, strPY)) && PYTable[i].pMH == NULL)
                YY[i] += 1;
          iIndex++;
          temp = (_PyStruct *) malloc (sizeof (_PyStruct));
          strcpy (temp->strHZ, strHZ);
          strcpy (temp->strPY, strMap);
          pyList = head->prev;

          while (pyList != head) {
            if (strcmp (pyList->strPY, strMap) <= 0)
                break;
            pyList = pyList->prev;
          }

          temp->next = pyList->next;
          temp->prev = pyList;
          pyList->next->prev = temp;
          pyList->next = temp;
      }
      else
          printf ("%s Error!!!!\n", strPY);
    }

    iCount = 0;
    for (i = 0; i < iBaseCount; i++) {
      if (YY[i])
          iCount++;
    }

    fwrite (&iCount, sizeof (int), 1, fp1);
    printf ("Groups: %d\n", iCount);
    iAllCount = iIndex;

    pyList = head->next;

    strcpy (strPY, pyList->strPY);
    iCount = 0;
    t = pyList;

    while (pyList != head) {
      if (!strcmp (strPY, pyList->strPY)) {
          iCount++;
      }
      else {
          tt++;
          fwrite (strPY, sizeof (char) * 2, 1, fp1);
          fwrite (&iCount, sizeof (int), 1, fp1);
          for (i = 0; i < iCount; i++) {
            fwrite (t->strHZ, sizeof (char) * 2, 1, fp1);

            t = t->next;
          }
          s += iCount;
          t = pyList;
          iCount = 1;
          strcpy (strPY, pyList->strPY);
      }
      pyList = pyList->next;
    }
    fwrite (strPY, sizeof (char) * 2, 1, fp1);
    fwrite (&iCount, sizeof (int), 1, fp1);
    for (i = 0; i < iCount; i++) {
      fwrite (t->strHZ, sizeof (char) * 2, 1, fp1);
      t = t->next;
    }
    s += iCount;

    fclose (fp1);
    fclose (fps);
}

int main (int argc, char *argv[])
{
    fps = fopen (argv[1], "rt");
    fpt = fopen (argv[2], "rt");
    fp1 = fopen ("pybase.mb", "wb");
    fp2 = fopen ("pyphrase.mb", "wb");
    if (fps && fpt && fp1 && fp2) {
      CreatePYBase ();
      LoadPY ();
      CreatePYPhrase ();
    }

    return 0;
}

Generated by  Doxygen 1.6.0   Back to index