尝试数据结构的实现…… 应用程序 - 字典

3

想要编写一个程序,使用Tries数据结构实现单词字典。

请告诉我实现的结构,以便我可以开始编写程序,因为我在互联网上没有找到任何有关Tries实现的文章。

困惑的是,当我们搜索单词时,只有当我们在叶节点通过单词时,才会存储单词的含义。但是,Tries数据结构中的所有节点都将是浪费的。即在每个内部节点中存储一个含义的变量......

因此,基本思想是,通过一个小例子来帮助存储字典,请告诉我Tries数据结构的结构..

并且请提供C程序实现..

谢谢..

压缩Tries.. 这给了我正确的压缩Trie,正如预期的那样,但还存在一些问题.... 并希望讨论一下....

1)首先我构建了一个简单的trie,然后使用函数trie_compress()进行了压缩,现在当我想要添加任何单词时,它会要求更改trie_add(),也更改trie_lookup(),好的,我将自己完成这个,只是想知道,我的方法是否正确或者是否有更好的方法。

2)在trie_new()中,我使用了t->substr = (char*)malloc(10); 这看起来不太高效,因为应该根据需要分配内存。我们能否改进这一点。

typedef struct trie
{
   int on;
   char *substr;
   struct trie *first_child;
   struct trie *next_sibling;
}trie;

trie* trie_new()
{
   trie *t = (trie*)malloc(sizeof(trie));
   t->substr = (char*)malloc(10);
   t->on = 0;
   t->substr[0] = '\0';
   t->first_child = NULL;
   t->next_sibling = NULL;

   return t;
}

trie* trie_at_level(trie *t, char c)
{
   while(t != NULL)
   {
      if(t->substr[0] == c)
      {
         return t;
      }
      t = t->next_sibling;
   }
   return NULL;
}

void trie_add(trie *t, const char *str)
{
   const int n = strlen(str);
   int i;

   for(i=0; i<n; i++)
   {
      const char c = str[i];
      trie* parent = t;

      t = t->first_child;
      t = trie_at_level(t,c);
      if(t == NULL)
      {
         t = trie_new();
         t->substr[0] = c;
         t->substr[1] = '\0';
         t->next_sibling = parent->first_child;
         parent->first_child = t;
      }
   }
   t->on = 1;
}

int trie_lookup(trie *t, const char *str)
{
   const int n = strlen(str);
   int i;

   for(i=0; i<n; i++)
   {
      const char c = str[i];
      t = t->first_child;
      t = trie_at_level(t,c);
      if(t == NULL)
         return 0;
   }
   return t->on;
}

void trie_compress(trie *t)
{
   trie* parent = t;
   t = t->first_child;

   if(t->first_child != NULL)
      trie_compress(t);

   if(t->next_sibling == NULL)
   {
      parent->substr = strcat(parent->substr,t->substr);
      parent->first_child = t->first_child;
      parent->on = t->first_child->on;
      free(t);

      return;
   }
   else
      trie_compress(t->next_sibling);
}

4
你可能想起昨天有人回答了你的问题:https://dev59.com/5U_Sa4cB1Zd3GeqP-j7N#3307833 - Matthew Slattery
2个回答

5

好的,我想这一次我做得没错。

压缩字典树:

#include <string.h>
#include <stdlib.h>

typedef struct trie {
  int value;
  char* key;
  struct trie* kids;
  struct trie* next;
} trie;

/* Creates an empty trie.
 */
trie* trie_new () {
  trie* t = (trie*) malloc (sizeof (trie));
  t->value = 0;
  t->key = NULL;
  t->kids = NULL;
  t->next = NULL;
  return t;
}

/* Sets |t->key| to |key|.
 */
static void trie_set_key (trie* t, const char* key) {
  char* key_copy = (char*) malloc (sizeof (char) * (strlen (key) + 1));
  strcpy (key_copy, key);
  free (t->key);
  t->key = key_copy;
}

/* Creates a trie with |->key| set to |key| whose |->value| is on.
 */
static trie* trie_new_init (const char* key) {
  trie* t = trie_new ();
  t->value = 1;
  trie_set_key (t, key);
  return t;
}

/* Frees all memory used by the trie |t|.
 */
void trie_delete (trie* t) {
  if (t == NULL) {
    return;
  }
  trie_delete (t->kids);
  trie_delete (t->next);
  free (t->key);
  free (t);
}

typedef struct trie_str_pair {
  trie* trie;
  const char* str;
} trie_str_pair;

/* Creates a trie_str_pair with the values |->trie| and |->str| set to
 *  |t| and |str|, respectively.
 */
static trie_str_pair mk_trie_str_pair (trie* t, const char* str) {
  trie_str_pair pair;
  pair.trie = t;
  pair.str = str;
  return pair;
}

/* Tries to find a sibling of |t| or |t| itself that matches the input
 *  choice function |choiceFunc|. A match occurs if |choiceFunc| returns
 *  a string other than NULL. Upon a match, the matching trie and the string
 *  are returned. Otherwise NULL values are returned in the pair struct.
 */
static trie_str_pair lookup_by (
      const char* (*choiceFunc)(const char*, trie*)
    , const char* key, trie* t
  ) {
  while (t != NULL) {
    const char* str = choiceFunc (key, t);
    if (str != NULL) {
      return mk_trie_str_pair (t, str);
    }
    t = t->next;
  }
  return mk_trie_str_pair (NULL, NULL);
}

/* If |prefix| is a prefix of |str|, returns a pointer into |str| immediately
 *  after the prefix.
 */
static const char* strip_prefix (const char* prefix, const char* str) {
  int i;
  for (i = 0; prefix [i] != '\0'; ++i) {
    if (str [i] != prefix [i]) {
      return NULL;
    }
  }
  return str + i;
}

/* If |t->key| is a prefix of |str|, returns a pointer into |str| immediately
 *  after the prefix.
 */
static const char* strip_prefix_with_key (const char* str, trie* t) {
  return strip_prefix (t->key, str);
}

/* If |str| is a prefix of |t->key|, returns a pointer into |t->key|
 *  immediately after the prefix.
 */
static const char* strip_prefix_from_key (const char* str, trie* t) {
  return strip_prefix (str, t->key);
}

/* Returns a pointer into |str1| immediately after the longest common prefix
 *  between |str1| and |str2|.
 */
static const char* strip_common_prefix (const char* str1, const char* str2) {
  int i;
  for (i = 0; str1 [i] != '\0' && str2 [i] != '\0'; ++i) {
    if (str1 [i] != str2 [i]) {
      break;
    }
  }
  if (i == 0) {
    return NULL;
  }
  return str1 + i;
}

/* Returns a pointer into |str| past the longest common prefix between
 *  |str| and |t->str|.
 */
static const char* strip_common_prefix_on_key (const char* str, trie* t) {
  return strip_common_prefix (str, t->key);
}

/* Returns 1 if |key| is in the trie |t|. Returns 0 if not.
 */
int trie_lookup (trie* t, const char* key) {
  while (key != NULL && key [0] != '\0') {
    trie_str_pair pair = lookup_by (strip_prefix_with_key, key, t->kids);
    t = pair.trie;
    if (t == NULL) {
      return 0;
    }
    key = pair.str;
  }
  return t->value;
}

/* Adds |kid| to |t|'s list of kids.
 */
static void trie_add_kid (trie* t, trie* kid) {
  kid->next = t->kids;
  t->kids = kid;
}

/* Removes |kid| from |t|'s list of kids.
 * |kid| must be in |t|'s list of kids.
 */
static void trie_remove_kid (trie* t, trie* kid) {
  if (t->kids == kid) {
    t->kids = kid->next;
  }
  else {
    t = t->kids;
    while (t->next != kid) {
      t = t->next;
    }
    t->next = kid->next;
  }
}

/* Replaces |kid| from |t|'s list of kids with |new_kid|.
 * |kid| must be in |t|'s list of kids.
 */
static void trie_replace_kid (trie* t, trie* kid, trie* new_kid) {
  trie_remove_kid (t, kid);
  trie_add_kid (t, new_kid);
}

/* If |t| has exactly one kid and no grandkids, |t| and its kid are merged
 *  into one trie node. In other words, |t|'s kid's |->key| is appended to
 *  |t->key| and |t->value| becomes that of its kid's |->value|.
 */
static void trie_try_merge_with_kids (trie* t) {
  if (t->key != NULL) {
    trie* kid = t->kids;
    if (kid != NULL && kid->next == NULL) {
      t->value = kid->value;
      t->kids = kid->kids;
      int new_len = strlen (t->key) + strlen (kid->key);
      t->key = realloc (t->key, sizeof (char) * (new_len + 1));
      strcat (t->key, kid->key);
      free (kid->key);
      free (kid);
    }
  }
}

/* Helper for trie_insert.
 */
static void trie_insert_split_key (
      trie* t
    , const char* key_prefix, const char* key_suffix
  ) {
  trie* kid = trie_new_init (key_suffix);
  trie_add_kid (t, kid);
  trie_set_key (t, key_prefix);
}

/* Helper for trie_insert.
 */
static void trie_insert_simple (trie* t, const char* key) {
  trie* kid = trie_new_init (key);
  trie_add_kid (t, kid);
}

/* Helper for trie_insert.
 */
static void trie_insert_fork (
      trie* t
    , trie* kid
    , char* key_prefix  /* Caller loses ownership of this string */
    , const char* key_suffix
    , const char* kid_key_suffix
  ) {
  trie* fork_kid = trie_new ();
  fork_kid->key = key_prefix;
  fork_kid->kids = trie_new_init (key_suffix);
  fork_kid->kids->next = kid;
  trie_replace_kid (t, kid, fork_kid);
  fork_kid->next = kid->next;
  kid->next = NULL;
  trie_set_key (kid, kid_key_suffix);
}

/* Inserts |key| into the trie |t|.
 */
void trie_insert (trie* t, const char* key) {
  if (key [0] == '\0') {
    return;
  }
  while (1) {
    trie_str_pair pair = lookup_by (strip_prefix_with_key, key, t->kids);
    trie* kid = pair.trie;
    const char* stripped = pair.str;
    if (kid != NULL) {
      if (stripped [0] == '\0') {
        kid->value = 1;
        return;
      }
      t = kid;
      key = stripped;
      continue;
    }
    pair = lookup_by (strip_prefix_from_key, key, t->kids);
    kid = pair.trie;
    stripped = pair.str;
    if (kid != NULL) {
      trie_insert_split_key (kid, key, stripped);
      return;
    }
    pair = lookup_by (strip_common_prefix_on_key, key, t->kids);
    kid = pair.trie;
    stripped = pair.str;
    if (kid == NULL) {
      trie_insert_simple (t, key);
      return;
    }
    int prefix_len = stripped - key;
    char* common_prefix = (char*) malloc (sizeof (char) * (prefix_len + 1));
    strncpy (common_prefix, key, prefix_len);
    common_prefix [prefix_len] = '\0';
    trie_insert_fork (t, kid, common_prefix, stripped, kid->key + prefix_len);
    return;
  }
}

/* Helper for trie_remove.
 */
static void trie_remove_simple (trie* t, trie* kid) {
  trie_remove_kid (t, kid);
  free (kid->key);
  free (kid);
}

/* Helper for trie_remove.
 */
static void trie_remove_merge (trie* t) {
  t->value = 0;
  trie_try_merge_with_kids (t);
}

/* Removes |key| from the trie |t|.
 */
void trie_remove (trie* t, const char* key) {
  trie_str_pair pair = lookup_by (strip_prefix_with_key, key, t->kids);
  trie* kid = pair.trie;
  const char* stripped = pair.str;
  if (kid == NULL) {
    return;
  }
  if (stripped [0] == '\0') {
    if (kid->kids == NULL) {
      trie_remove_simple (t, kid);
    }
    else {
      trie_remove_merge (kid);
    }
  }
  else {
    trie_remove (kid, stripped);
  }
  trie_try_merge_with_kids (t);
}

我怎样能从这个实现中删除一个单词...... 另外,如果可能的话,如果我想为压缩 Trie 写结构体...... 请告诉我您对此的想法.... 只需要压缩 Trie 的结构体..... - AGeek
添加了一个删除函数。但我不打算编写压缩函数。 - Thomas Eding
谢谢,先生......您非常有帮助......您的代码给了我对 Trie 和相关结构的深刻洞见......因为我一直在独自工作,所以有些受用。谢谢...此外,我现在编写了一段代码,它给了我一个压缩的 Trie。文章http://www.heppenstall.ca/academics/doc/242/F2001Archives/Ch11_3_tries.pdf 现在,我遵循的想法是先构建一个简单的Trie,然后再尝试压缩Trie。代码附在问题中.... - AGeek
好吧,我有点无聊,所以我要试着做一下Trie压缩。 - Thomas Eding
为什么你在这些地方使用了static和const关键字......请告诉我背后的想法,如果它能使某些东西更好地工作,那么我也想将其纳入我的代码中。再次感谢您能够解决问题。谢谢先生。 - AGeek
我更新了我的代码。我进行了一些测试,它似乎可以工作。在函数前面放置 static 会使函数在文件外部隐藏。有关在函数中使用 static,请参阅 http://en.wikipedia.org/wiki/Local_variable#Static_local_variables。至于 const,我建议谷歌一下。 - Thomas Eding

-1

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接