词向量嵌入的实例详解


当前第2页 返回上一页

import tensorflow as tf
import numpy as np
from helpers import lazy_property
class EmbeddingModel:
def __init__(self, data, target, params):
self.data = data
self.target = target
self.params = params
self.embeddings
self.cost
self.optimize
@lazy_property
def embeddings(self):
initial = tf.random_uniform(
[self.params.vocabulary_size, self.params.embedding_size],
-1.0, 1.0)
return tf.Variable(initial)
@lazy_property
def optimize(self):
optimizer = tf.train.MomentumOptimizer(
self.params.learning_rate, self.params.momentum)
return optimizer.minimize(self.cost)
@lazy_property
def cost(self):
embedded = tf.nn.embedding_lookup(self.embeddings, self.data)
weight = tf.Variable(tf.truncated_normal(
[self.params.vocabulary_size, self.params.embedding_size],
stddev=1.0 / self.params.embedding_size ** 0.5))
bias = tf.Variable(tf.zeros([self.params.vocabulary_size]))
target = tf.expand_dims(self.target, 1)
return tf.reduce_mean(tf.nn.nce_loss(
weight, bias, embedded, target,
self.params.contrastive_examples,
self.params.vocabulary_size))

import collections
import tensorflow as tf
import numpy as np
from batched import batched
from EmbeddingModel import EmbeddingModel
from skipgrams import skipgrams
from Wikipedia import Wikipedia
from helpers import AttrDict
WIKI_DOWNLOAD_DIR = './wikipedia'
params = AttrDict(
vocabulary_size=10000,
max_context=10,
embedding_size=200,
contrastive_examples=100,
learning_rate=0.5,
momentum=0.5,
batch_size=1000,
)
data = tf.placeholder(tf.int32, [None])
target = tf.placeholder(tf.int32, [None])
model = EmbeddingModel(data, target, params)
corpus = Wikipedia(
'https://dumps.wikimedia.org/enwiki/20160501/'
'enwiki-20160501-pages-meta-current1.xml-p000000010p000030303.bz2',
WIKI_DOWNLOAD_DIR,
params.vocabulary_size)
examples = skipgrams(corpus, params.max_context)
batches = batched(examples, params.batch_size)
sess = tf.Session()
sess.run(tf.initialize_all_variables())
average = collections.deque(maxlen=100)
for index, batch in enumerate(batches):
feed_dict = {data: batch[0], target: batch[1]}
cost, _ = sess.run([model.cost, model.optimize], feed_dict)
average.append(cost)
print('{}: {:5.1f}'.format(index + 1, sum(average) / len(average)))
if index > 100000:
break
embeddings = sess.run(model.embeddings)
np.save(WIKI_DOWNLOAD_DIR + '/embeddings.npy', embeddings)

以上就是词向量嵌入的实例详解的详细内容,更多文章请关注木庄网络博客!!

返回前面的内容

相关阅读 >>

32位的电脑怎么下载Python

geany中怎么配置Python

如何用Python画烟花

Python程序死循环怎么停止

Python下载好了怎么打开

简诉Python re模块中re.search和re.match的区别

Python编程语言是什么

vscode无法调试Python怎么解决

Python变量类型-如何给单个或者多个变量进行赋值(实例解析)

Python条件语句是什么?条件语句的一般格式是什么样的?

更多相关阅读请进入《Python》频道 >>




打赏

取消

感谢您的支持,我会继续努力的!

扫码支持
扫码打赏,您说多少就多少

打开支付宝扫一扫,即可进行扫码打赏哦

分享从这里开始,精彩与您同在

评论

管理员已关闭评论功能...