1. 程式人生 > >[Trie樹] 統計英文文字中單詞出現的個數 - C語言實現 - 考慮數字、英文

[Trie樹] 統計英文文字中單詞出現的個數 - C語言實現 - 考慮數字、英文

【英文文字】

   However, after reaching the shore there are plenty of challenges waiting for him."The biggest challenge now is learning to walk again! My biggest fear when I was coming out of the water and back onto the beach was that I was going to fall over. As I’ve not stepped foot on land for over five months, the tendons and ligaments in my feet have been asleep, so I basically have to learn to walk again. 
  Ross is not new to extreme challenges. He accomplished the world's longest rope climb in less than 24 hours by climbing the height equal of Mount Everest-8,848m (29,029 feet)
  He also attempted to swim 100km in the Caribbean carrying a 100lb tree.
On his Twitter account, Ross shared what his body looked like after this swim.

【結果】
在這裡插入圖片描述

【程式碼】

/*
 *@Time:20181111
 *@Test:統計英文文字中所有英語單詞的個數
 *@Desc:只統計單詞,並全部化為小寫來統計
 */
 
# include <stdio.h>
# include <stdlib.h>
# include <string.h>
 
#define MAXSTRLEN 1024
 
typedef struct node {
	int cnt;                       // 統計各個單詞出現的次數 初始化為0
	struct node *next[40];		   // 數字放在0-9,單詞放在10-36 
}TrieTreeNode,* TrieTree; TrieTree createTrieTreeNode(); // 新建並初始化一個節點 int InsertTrieTreeNode(TrieTree *pT, char *str); // 插入一個單詞 int DeleteTrieTreeNode(TrieTree *pT, char *str); // 刪除一個單詞 int SearchTrieTree(TrieTree T, char *str); // 搜尋Trie樹 void TraverseTrieTree(TrieTree T)
; // 遍歷Trie樹 void DestroyTrieTree(TrieTree *pT); // 銷燬Trie樹 // 新建並初始化一個節點 TrieTree createTrieTreeNode() { TrieTreeNode *treeNode; treeNode = (TrieTreeNode*)malloc(sizeof(TrieTreeNode)); if (!treeNode) exit(0); memset(treeNode->next, 0x00, sizeof(treeNode->next)); //所有的next都賦值為0 treeNode->cnt = 0; return treeNode; } // 插入一個單詞 int InsertTrieTreeNode(TrieTree *pT, char *str) { int i, index; TrieTreeNode *tempNode = *pT; if(tempNode==NULL || str == NULL || str[0]=='\0') // Trie樹帶有一頭結點 return 0; // 遍歷字串,找到Trie中的位置 for(i=0; i<strlen(str) ;i++) { if (str[i]>='0' && str[i]<='9') //數字 index = str[i] - '0'; //下標放在[0,9] else if (str[i]>='a' && str[i]<='z') //字母 index = str[i] - 'a' + 10; //下標放在[10,36] else { printf("單詞錯誤,有別的型別字元\n"); return 0; } // 往下走 if (tempNode->next[index] == NULL) { //還沒有這個結點 tempNode->next[index] = createTrieTreeNode(); } tempNode = tempNode->next[index]; //往下走 } // 計數 tempNode->cnt = tempNode->cnt + 1; return 0; } /* * 搜尋Trie樹 * 存在返回個數 * 不存在返回0 */ int SearchTrieTree(TrieTree T, char *str) { int i, index; TrieTreeNode *tempNode = T; if(tempNode==NULL || str == NULL) return 0; //搜尋 for(i=0; i<strlen(str); i++) { if (str[i]>='0' && str[i]<='9') //數字 index = str[i] - '0'; //下標放在[0,9] else if (str[i]>='a' && str[i]<='z') //字母 index = str[i] - 'a' + 10; //下標放在[10,36] else { printf("單詞錯誤,有別的型別字元\n"); return 0; } if(tempNode->next[index] == NULL) { //走不下去了 return 0; //沒找到 } tempNode = tempNode->next[index]; } return tempNode->cnt; } /* * 刪除單詞 * 不存在返回0 * 存在返回刪除後的個數 */ int DeleteTrieTreeNode(TrieTree *pT, char *str) { int i, index; TrieTreeNode *tempNode = *pT; if(tempNode==NULL || str == NULL) return 0; for(i=0; i<strlen(str); i++) { if (str[i]>='0' && str[i]<='9') //數字 index = str[i] - '0'; //下標放在[0,9] else if (str[i]>='a' && str[i]<='z') //字母 index = str[i] - 'a' + 10; //下標放在[10,36] else { printf("單詞錯誤,有別的型別字元\n"); return 0; } if(tempNode->next[index] == NULL) { return 0; } tempNode = tempNode->next[index]; } tempNode->cnt = tempNode->cnt-1; return tempNode->cnt; } // 遍歷Trie樹,使用靜態變數,遞迴時可以記錄之前一層上的字元 void TraverseTrieTree(TrieTree T) { int i; static char word[MAXSTRLEN+1] = {'\0'}; static int len=0; if(T==NULL) return; for(i=0; i<37; i++) { if (T->next[i]==NULL) { continue; } // 賦值 if (i>=0 && i<=9) { //數字 word[len++] = i + '0'; } else { //字母 word[len++] = i - 10 + 'a'; } // 如果這個字串存在,輸出 if(T->next[i]->cnt > 0) { word[len] = '\0'; printf("%-20s %-8d\n", word, T->next[i]->cnt); } // 遍歷下一個 TraverseTrieTree(T->next[i]); len--; } } /*銷燬Trie樹*/ void DestroyTrieTree(TrieTree *pT) { int i; if((*pT)==NULL) return; for(i=0; i<40; i++) { if((*pT)->next[i] != NULL) { DestroyTrieTree(&(*pT)->next[i]); } } free(*pT); // 子節點全部刪除 *pT = NULL; } int main() { char word[1024+1] = {'\0'}; //存放讀到的單詞 char c; int len; TrieTree T = NULL; FILE *fp; T = createTrieTreeNode(); //建立Trie樹 // 讀入檔案 fp = fopen("2013-8.txt", "r"); len = 0; while ( fscanf(fp, "%c", &c)!=EOF ) { if ( c>='0' && c<='9' ) { //數字 word[len++] = c; } else if ( c>='a' && c<='z' ) { //小寫字母 word[len++] = c; } else if ( c>='A' && c<='Z') { //大寫字母 word[len++] = c - 'A' + 'a'; //轉成小寫,考慮 } else { //其他字元 if (len==0) { //前面沒有單詞 continue; //繼續 } else { //前面有單詞 word[len] = '\0'; //附上結束串 //printf("- %s\n", word); //debug列印讀取的字串 InsertTrieTreeNode(&T, word); //插入Trie樹 } len =0; } } fclose(fp); // 輸出全部單詞的次數 TraverseTrieTree(T); // 銷燬 DestroyTrieTree(&T); return 0; }