tensorflow17《TensorFlow實戰Google深度學習框架》筆記-08-02 使用迴圈神經網路實現語言模型 code

阿新 • • 發佈：2019-02-09

00 reader.py

# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software 

# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================== 



"""Utilities for parsing PTB text files."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import collections
import os

import tensorflow as tf


def _read_words(filename):
  with tf.gfile.GFile(filename, "r") as f:
    return f.read().decode("utf-8" 
).replace("\n", "<eos>").split()


def _build_vocab(filename):
  data = _read_words(filename)

  counter = collections.Counter(data)
  count_pairs = sorted(counter.items(), key=lambda x: (-x[1], x[0]))

  words, _ = list(zip(*count_pairs))
  word_to_id = dict(zip(words, range(len(words))))

  return word_to_id


def _file_to_word_ids(filename, word_to_id):
  data = _read_words(filename)
  return [word_to_id[word] for word in data if word in word_to_id]


def ptb_raw_data(data_path=None):
  """Load PTB raw data from data directory "data_path".

  Reads PTB text files, converts strings to integer ids,
  and performs mini-batching of the inputs.

  The PTB dataset comes from Tomas Mikolov's webpage:

  http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz

  Args:
    data_path: string path to the directory where simple-examples.tgz has
      been extracted.

  Returns:
    tuple (train_data, valid_data, test_data, vocabulary)
    where each of the data objects can be passed to PTBIterator.
  """

  train_path = os.path.join(data_path, "ptb.train.txt")
  valid_path = os.path.join(data_path, "ptb.valid.txt")
  test_path = os.path.join(data_path, "ptb.test.txt")

  word_to_id = _build_vocab(train_path)
  train_data = _file_to_word_ids(train_path, word_to_id)
  valid_data = _file_to_word_ids(valid_path, word_to_id)
  test_data = _file_to_word_ids(test_path, word_to_id)
  vocabulary = len(word_to_id)
  return train_data, valid_data, test_data, vocabulary


def ptb_producer(raw_data, batch_size, num_steps, name=None):
  """Iterate on the raw PTB data.

  This chunks up raw_data into batches of examples and returns Tensors that
  are drawn from these batches.

  Args:
    raw_data: one of the raw data outputs from ptb_raw_data.
    batch_size: int, the batch size.
    num_steps: int, the number of unrolls.
    name: the name of this operation (optional).

  Returns:
    A pair of Tensors, each shaped [batch_size, num_steps]. The second element
    of the tuple is the same data time-shifted to the right by one.

  Raises:
    tf.errors.InvalidArgumentError: if batch_size or num_steps are too high.
  """
  with tf.name_scope(name, "PTBProducer", [raw_data, batch_size, num_steps]):
    raw_data = tf.convert_to_tensor(raw_data, name="raw_data", dtype=tf.int32)

    data_len = tf.size(raw_data)
    batch_len = data_len // batch_size
    data = tf.reshape(raw_data[0 : batch_size * batch_len],
                      [batch_size, batch_len])

    epoch_size = (batch_len - 1) // num_steps
    assertion = tf.assert_positive(
        epoch_size,
        message="epoch_size == 0, decrease batch_size or num_steps")
    with tf.control_dependencies([assertion]):
      epoch_size = tf.identity(epoch_size, name="epoch_size")

    i = tf.train.range_input_producer(epoch_size, shuffle=False).dequeue()
    x = tf.strided_slice(data, [0, i * num_steps],
                         [batch_size, (i + 1) * num_steps])
    x.set_shape([batch_size, num_steps])
    y = tf.strided_slice(data, [0, i * num_steps + 1],
                         [batch_size, (i + 1) * num_steps + 1])
    y.set_shape([batch_size, num_steps])
    return x, y

01 PTB資料集介紹

# 《TensorFlow實戰Google深度學習框架》08 迴圈神經網路
# win10 Tensorflow1.0.1 python3.5.3
# CUDA v8.0 cudnn-8.0-windows10-x64-v5.1
# filename:ts08.02.py # PTB資料集介紹

import tensorflow as tf
import reader

# 1. 讀取資料並列印長度及前100位資料，需要PTB_data
DATA_PATH = "../../datasets/PTB_data"
train_data, valid_data, test_data, _ = reader.ptb_raw_data(DATA_PATH)
print(len(train_data))
print(train_data[:100])
'''
929589
[9970, 9971, 9972, 9974, 9975, 9976, 9980, 9981, 9982, 9983, 9984, 9986, 9987, 9988, 9989, 9991, 9992, 9993, 9994, 9995, 9996, 9997, 9998, 9999, 2, 9256, 1, 3, 72, 393, 33, 2133, 0, 146, 19, 6, 9207, 276, 407, 3, 2, 23, 1, 13, 141, 4, 1, 5465, 0, 3081, 1596, 96, 2, 7682, 1, 3, 72, 393, 8, 337, 141, 4, 2477, 657, 2170, 955, 24, 521, 6, 9207, 276, 4, 39, 303, 438, 3684, 2, 6, 942, 4, 3150, 496, 263, 5, 138, 6092, 4241, 6036, 30, 988, 6, 241, 760, 4, 1015, 2786, 211, 6, 96, 4]
'''
# 2. 將訓練資料組織成batch大小為4、截斷長度為5的資料組。並使用佇列讀取前3個batch
# ptb_producer返回的為一個二維的tuple資料。
result = reader.ptb_producer(train_data, 4, 5)

# 通過佇列依次讀取batch。
with tf.Session() as sess:
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    for i in range(3):
        x, y = sess.run(result)
        print("X%d: "%i, x)
        print("Y%d: "%i, y)
    coord.request_stop()
    coord.join(threads)
'''
X0:  [[9970 9971 9972 9974 9975]
 [ 332 7147  328 1452 8595]
 [1969    0   98   89 2254]
 [   3    3    2   14   24]]
Y0:  [[9971 9972 9974 9975 9976]
 [7147  328 1452 8595   59]
 [   0   98   89 2254    0]
 [   3    2   14   24  198]]
X1:  [[9976 9980 9981 9982 9983]
 [  59 1569  105 2231    1]
 [   0  312 1641    4 1063]
 [ 198  150 2262   10    0]]
Y1:  [[9980 9981 9982 9983 9984]
 [1569  105 2231    1  895]
 [ 312 1641    4 1063    8]
 [ 150 2262   10    0  507]]
X2:  [[9984 9986 9987 9988 9989]
 [ 895    1 5574    4  618]
 [   8  713    0  264  820]
 [ 507   74 2619    0    1]]
Y2:  [[9986 9987 9988 9989 9991]
 [   1 5574    4  618    2]
 [ 713    0  264  820    2]
 [  74 2619    0    1    8]]
'''

02 使用迴圈神經網路實現語言模型

# 《TensorFlow實戰Google深度學習框架》08 迴圈神經網路
# win10 Tensorflow1.0.1 python3.5.3
# CUDA v8.0 cudnn-8.0-windows10-x64-v5.1
# filename:ts08.03.py # 使用迴圈神經網路實現語言模型

import numpy as np
import tensorflow as tf
import reader

# 1. 定義相關的引數
DATA_PATH = "../../datasets/PTB_data"
HIDDEN_SIZE = 200
NUM_LAYERS = 2
VOCAB_SIZE = 10000

LEARNING_RATE = 1.0
TRAIN_BATCH_SIZE = 20
TRAIN_NUM_STEP = 35

EVAL_BATCH_SIZE = 1
EVAL_NUM_STEP = 1
NUM_EPOCH = 2
KEEP_PROB = 0.5
MAX_GRAD_NORM = 5

# 2. 定義一個類來描述模型結構
class PTBModel(object):
    def __init__(self, is_training, batch_size, num_steps):

        self.batch_size = batch_size
        self.num_steps = num_steps

        # 定義輸入層。
        self.input_data = tf.placeholder(tf.int32, [batch_size, num_steps])
        self.targets = tf.placeholder(tf.int32, [batch_size, num_steps])

        # 定義使用LSTM結構及訓練時使用dropout。
        lstm_cell = tf.contrib.rnn.BasicLSTMCell(HIDDEN_SIZE)
        if is_training:
            lstm_cell = tf.contrib.rnn.DropoutWrapper(lstm_cell, output_keep_prob=KEEP_PROB)
        cell = tf.contrib.rnn.MultiRNNCell([lstm_cell] * NUM_LAYERS)

        # 初始化最初的狀態。
        self.initial_state = cell.zero_state(batch_size, tf.float32)
        embedding = tf.get_variable("embedding", [VOCAB_SIZE, HIDDEN_SIZE])

        # 將原本單詞ID轉為單詞向量。
        inputs = tf.nn.embedding_lookup(embedding, self.input_data)

        if is_training:
            inputs = tf.nn.dropout(inputs, KEEP_PROB)

        # 定義輸出列表。
        outputs = []
        state = self.initial_state
        with tf.variable_scope("RNN"):
            for time_step in range(num_steps):
                if time_step > 0: tf.get_variable_scope().reuse_variables()
                cell_output, state = cell(inputs[:, time_step, :], state)
                outputs.append(cell_output)
        output = tf.reshape(tf.concat(outputs, 1), [-1, HIDDEN_SIZE])
        weight = tf.get_variable("weight", [HIDDEN_SIZE, VOCAB_SIZE])
        bias = tf.get_variable("bias", [VOCAB_SIZE])
        logits = tf.matmul(output, weight) + bias

        # 定義交叉熵損失函式和平均損失。
        loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example(
            [logits],
            [tf.reshape(self.targets, [-1])],
            [tf.ones([batch_size * num_steps], dtype=tf.float32)])
        self.cost = tf.reduce_sum(loss) / batch_size
        self.final_state = state

        # 只在訓練模型時定義反向傳播操作。
        if not is_training: return
        trainable_variables = tf.trainable_variables()

        # 控制梯度大小，定義優化方法和訓練步驟。
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, trainable_variables), MAX_GRAD_NORM)
        optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE)
        self.train_op = optimizer.apply_gradients(zip(grads, trainable_variables))

# 3. 使用給定的模型model在資料data上執行train_op並返回在全部資料上的perplexity值
def run_epoch(session, model, data, train_op, output_log, epoch_size):
    total_costs = 0.0
    iters = 0
    state = session.run(model.initial_state)

    # 訓練一個epoch。
    for step in range(epoch_size):
        x, y = session.run(data)
        cost, state, _ = session.run([model.cost, model.final_state, train_op],
                                        {model.input_data: x, model.targets: y, model.initial_state: state})
        total_costs += cost
        iters += model.num_steps

        if output_log and step % 100 == 0:
            print("After %d steps, perplexity is %.3f" % (step, np.exp(total_costs / iters)))
    return np.exp(total_costs / iters)

# 4. 定義主函式並執行
def main():
    train_data, valid_data, test_data, _ = reader.ptb_raw_data(DATA_PATH)

    # 計算一個epoch需要訓練的次數
    train_data_len = len(train_data)
    train_batch_len = train_data_len // TRAIN_BATCH_SIZE
    train_epoch_size = (train_batch_len - 1) // TRAIN_NUM_STEP

    valid_data_len = len(valid_data)
    valid_batch_len = valid_data_len // EVAL_BATCH_SIZE
    valid_epoch_size = (valid_batch_len - 1) // EVAL_NUM_STEP

    test_data_len = len(test_data)
    test_batch_len = test_data_len // EVAL_BATCH_SIZE
    test_epoch_size = (test_batch_len - 1) // EVAL_NUM_STEP

    initializer = tf.random_uniform_initializer(-0.05, 0.05)
    with tf.variable_scope("language_model", reuse=None, initializer=initializer):
        train_model = PTBModel(True, TRAIN_BATCH_SIZE, TRAIN_NUM_STEP)

    with tf.variable_scope("language_model", reuse=True, initializer=initializer):
        eval_model = PTBModel(False, EVAL_BATCH_SIZE, EVAL_NUM_STEP)

    # 訓練模型。
    with tf.Session() as session:
        tf.global_variables_initializer().run()

        train_queue = reader.ptb_producer(train_data, train_model.batch_size, train_model.num_steps)
        eval_queue = reader.ptb_producer(valid_data, eval_model.batch_size, eval_model.num_steps)
        test_queue = reader.ptb_producer(test_data, eval_model.batch_size, eval_model.num_steps)

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=session, coord=coord)

        for i in range(NUM_EPOCH):
            print("In iteration: %d" % (i + 1))
            run_epoch(session, train_model, train_queue, train_model.train_op, True, train_epoch_size)

            valid_perplexity = run_epoch(session, eval_model, eval_queue, tf.no_op(), False, valid_epoch_size)
            print("Epoch: %d Validation Perplexity: %.3f" % (i + 1, valid_perplexity))

        test_perplexity = run_epoch(session, eval_model, test_queue, tf.no_op(), False, test_epoch_size)
        print("Test Perplexity: %.3f" % test_perplexity)

        coord.request_stop()
        coord.join(threads)

if __name__ == "__main__":
    main()
'''
In iteration: 1
After 0 steps, perplexity is 10009.690
After 100 steps, perplexity is 1465.472
After 200 steps, perplexity is 1081.721
After 300 steps, perplexity is 900.029
After 400 steps, perplexity is 785.582
After 500 steps, perplexity is 707.795
After 600 steps, perplexity is 649.047
After 700 steps, perplexity is 598.087
After 800 steps, perplexity is 553.027
After 900 steps, perplexity is 518.264
After 1000 steps, perplexity is 491.357
After 1100 steps, perplexity is 465.917
After 1200 steps, perplexity is 444.835
After 1300 steps, perplexity is 425.917
Epoch: 1 Validation Perplexity: 238.228
In iteration: 2
After 0 steps, perplexity is 350.195
After 100 steps, perplexity is 243.940
After 200 steps, perplexity is 248.997
After 300 steps, perplexity is 249.440
After 400 steps, perplexity is 246.536
After 500 steps, perplexity is 243.698
After 600 steps, perplexity is 243.138
After 700 steps, perplexity is 240.505
After 800 steps, perplexity is 235.897
After 900 steps, perplexity is 233.252
After 1000 steps, perplexity is 231.533
After 1100 steps, perplexity is 228.082
After 1200 steps, perplexity is 225.515
After 1300 steps, perplexity is 222.819
Epoch: 2 Validation Perplexity: 181.821
Test Perplexity: 177.882
'''

TensorFlow：實戰Google深度學習框架（七）迴圈神經網路

RNN網路確實可以解決和時間序列有關係的問題，但是，在實際的應用過程中，我們可以看到效能並不是很好。RNN記住了以前輸入的所有資訊，但是有時候並不需要記住以前時刻的所有資訊，增加了很多的系統記憶體負擔；有時候我們的確需要記住很長時間以前資訊，但是又容易出現梯度爆炸或者梯度消失的問題。針對於這個問題，人們開

tensorflow17《TensorFlow實戰Google深度學習框架》筆記-08-02 使用迴圈神經網路實現語言模型 code

TensorFlow-實戰Google深度學習框架筆記（上）

當我日誌不一定 rain 如何 validate .config 存儲構建 TensorFlow TensorFlow 是一種采用數據流圖（data flow graphs），用於數值計算的開源軟件庫。在 Tensorflow 中，所有不同的變量和運算都是儲存在計算圖

TensorFlow筆記（7）-----實戰Google深度學習框架----隊列與多線程

一起 width nbsp stop 之前圖片第一個 queue enqueue 一、創建一個隊列： FIFOQueue：先進先出 RandomShuffleQueue：會將隊列中的元素打亂，每次出列操作得到的是從當前隊列所有元素中隨機選擇的一個。二、操作一個隊列的函

TensorFlow+實戰Google深度學習框架學習筆記（12）------Mnist識別和卷積神經網路LeNet

一、卷積神經網路的簡述卷積神經網路將一個影象變窄變長。原本【長和寬較大，高較小】變成【長和寬較小，高增加】卷積過程需要用到卷積核【二維的滑動視窗】【過濾器】，每個卷積核由n*m（長*寬）個小格組成，每個小格都有自己的權重值，長寬變窄：過濾器的長寬決定的高度變高：過濾器的個數決定的 &nb

TensorFlow+實戰Google深度學習框架學習筆記（13）------Mnist識別和卷積神經網路AlexNet

一、AlexNet：共8層：5個卷積層（卷積+池化）、3個全連線層，輸出到softmax層，產生分類。論文中lrn層推薦的引數：depth_radius = 4，bias = 1.0 , alpha = 0.001 / 9.0 , beta = 0.75 lrn現在僅在AlexNet中使用，

TensorFlow+實戰Google深度學習框架學習筆記（13）------Mnist識別和卷積神經網絡AlexNet

net dev adding 筆記 learn 明顯 lex test info 一、AlexNet：共8層：5個卷積層（卷積+池化）、3個全連接層，輸出到softmax層，產生分類。論文中lrn層推薦的參數：depth_radius = 4，bias = 1.0 ,

TensorFlow+實戰Google深度學習框架學習筆記（14）------VGGNet

一、VGGNet：5段卷積【每段有2~3個卷積層+最大池化層】【每段過濾器個數：64-128-256-512-512】每段的2~3個卷積層串聯在一起的作用： 2個3×3的卷積層串聯的效果相當於一個5×5的卷積層，即一個畫素會跟周圍5×5的畫素產生關聯。【28*28的輸入經過一次5*5得到24*24，s=

Tensorflow 實戰Google深度學習框架——學習筆記（六）LeNet-5網路實現MNIST手寫數字集識別

使用LeNet-5模型實現MNIST手寫數字識別，其神經網路架構如下：一、詳細介紹LeNet-5模型每一層的結構第一層，卷積層這一層輸入原始的影象畫素，接受的輸入層大小為32*32*1，第一個卷積層過濾器尺寸為5*5,共6個，不使用全0填

（windows10版）Tensorflow 實戰Google深度學習框架學習筆記（五）正則化

# 1. 生成模擬資料集import tensorflow as tfimport matplotlib.pyplot as pltimport numpy as npdata = []label = []np.random.seed(0) #每次生成相同的隨機數# 以原點為

《TensorFlow：實戰Google深度學習框架（第二版）》筆記【1-6章】

第一章：深度學習簡介在大部分情況下，在訓練資料達到一定數量之前，越多的訓練資料可以使邏輯迴歸演算法對未知郵件做出的判斷越精準。之所以說在大部分情況下，是因為邏輯迴歸演算法的效果除了依賴於訓練資料，也依賴於從資料中提取的特徵。假設從郵件中抽取的特徵只有郵件傳送

Tensorflow實戰Google深度學習框架-學習筆記

tensor:張量，是tensorflow的資料模型。在tensorflow中可以簡單理解為多位陣列，表示計算節點，是tensorflow管理資料的形式。但是在tensorflow中，張量的實現並不是直接採用陣列的形式，它僅僅是對運算結果的引用。張量的三個主

Tensorflow 實戰Google深度學習框架——學習筆記（五）TensorFlow持久化

TensorFlow模型持久化模型持久化的目的：為了讓訓練完的模型可以在下次使用 TensorFlow提供了一個非常簡單的API來儲存和還原一個神經網路，這個API類就是tf.train.Saver類。以下是儲存TensorFlow計算圖的方法。變數

tensorflow10 《TensorFlow實戰Google深度學習框架》筆記-05-03模型持久化code

01 ckpt檔案儲存方法 # 《TensorFlow實戰Google深度學習框架》05 minist數字識別問題 # win10 Tensorflow1.0.1 python3.5.3 # CUDA

Tensorflow 實戰Google深度學習框架——學習筆記（六）遷移學習

TensorFlow遷移學習的步驟 1、獲取（你要訓練的）資料集 2、獲取模型（bp檔案載入） 3、獲取遷移後的輸出張量bottleneck_tensor和輸入張量jpeg_data_tensor（這一步同時也會獲取jpeg_data_tens

我是初學者——TensorFlow實戰Google深度學習框架（學習筆記一）

書本3.1.2計算圖的使用中的第41頁，例項中 v=tf.get_variable( "v",initializer=tf.zeros_initializer(shape=[1])) 報錯原因在於新版的tensorflow將語法改為 v=tf.get_varia

tensorflow06 《TensorFlow實戰Google深度學習框架》筆記-04-04正則化

# 《TensorFlow實戰Google深度學習框架》04 深層神經網路 # win10 Tensorflow1.0.1 python3.5.3 # CUDA v8.0 cudnn-8.0-windows10-x64-v5.1 # filename:ts04.

《TensorFlow實戰Google深度學習框架（2017.3第一版）》學習筆記（一）

書本3.1.2計算圖的使用中的第41頁，例項中 v = tf.get_variable( 'v', initializer = tf.zeros_initializer(shape = [1])) 報錯原因在於新版的tensorflow將語法改為 v

《Tensorflow 實戰Google深度學習框架》PDF版

roc mage com size 深度學習框架 pdf 深度學習 ext flow 鏈接：https://pan.baidu.com/s/1Jf8LWJXWUnfpNly0KagVMA《Tensorflow 實戰Google深度學習框架》PDF版

《TensorFlow實戰Google深度學習框架 (第2版) 》中文版PDF和源代碼

分享 flow shadow 源代碼 ges ado text href 第2版下載：https://pan.baidu.com/s/1aD1Y2erdtppgAbk8c63xEw 更多最新的資料：http://blog.51cto.com/3215120 《Tensor

tensorflow17《TensorFlow實戰Google深度學習框架》筆記-08-02 使用迴圈神經網路實現語言模型 code

00 reader.py

01 PTB資料集介紹

02 使用迴圈神經網路實現語言模型

相關推薦