TA的每日心情 | 开心 2019-9-4 10:12 |
---|
签到天数: 1 天 [LV.1]初来乍到
|
本帖最后由 jiaqiangmit2 于 2019-8-2 21:30 编辑
8 c) l. C8 ]: b" ^' b
- M4 V- A9 A* w- q/ \3 z* f各位大神
! a0 u! d+ D7 ?1 K$ o* q" x我在anki里已经实现提取LDOCE6真人朗读例句的制卡。现在希望能将音频文件单独放手机,睡觉和开车等时间听。问题来了:anki对媒体文件的命名是以类似于HAsh方式“乱码”命名的,从媒体文件夹里复制出来,所有的例句和单词的朗读时打乱的。能否实现:单词以其拼写命名(example.mp3),对应的几个例句音频以单词拼写加后缀的方式命名,这样,放在手机里,单词和对应例句就可以一起读了。 ~4 ]( u6 {3 P2 b, E# R' `+ q

1 \" ^- O% K! c# K3 @) `6 `7 N8 ^% j1 w+ i: M
如图,文件名是“l乱码”的0 c5 b9 m" d3 z0 {1 T7 h) v
无法上任附件,名字是这样的:
% @' ]: R. S! r, i I2 a; f [sound:-ldoce6-b0ab2fba-11b6ffb4-d84db123-b00153f3-9971952a.mp3]
" G8 o7 z7 Y/ Y& W6 T0 O- z4 R6 H8 E% i3 O2 Q+ l7 f6 t3 @* r
对应 脚本
$ U% L/ F: ?5 y* V- w9 d8 o5 [7 A( R; c, y* a6 b N
------------------------------
; z7 r4 i' v! [$ I# y# c6 T6 V. ?/ J1 f9 J3 ^; h, [9 ?% o
- U6 M$ G# _4 K
#-*- coding:utf-8 -*-: M4 M: s: N+ z8 h- Y3 F
import os
) W( \2 B0 L1 ^/ X( Z# ?( c" dimport re
6 k8 Y3 c9 l( ^8 s! z- W |, S+ s! oimport random# y0 p- n5 p; I
from ..base import *6 F1 X/ q J) r+ w- N$ C
: A1 I. L: N% `1 f, G3 E+ s1 F, F
( m" Y7 A2 b& c a. T) v5 ^6 CVOICE_PATTERN = r'<a href="sound://([\w/]+\w*\.mp3)"><img src="img/spkr_%s.png"></a>'
5 V6 S1 ]7 ?8 L/ F7 Y. J* \ P" g* eVOICE_PATTERN_WQ = r'<span class="%s"><a href="sound://([\w/]+\w*\.mp3)">(.*?)</span %s>'& l; |9 c! j8 v
MAPPINGS = [$ }- ]) m6 H, `
['br', [re.compile(VOICE_PATTERN % r'r'), re.compile(VOICE_PATTERN_WQ % (r'brevoice', r'brevoice'))]],
( _) U7 E2 D% N+ G* Z$ u: e ['us', [re.compile(VOICE_PATTERN % r'b'), re.compile(VOICE_PATTERN_WQ % (r'amevoice', r'amevoice'))]]% i7 ?( J; w3 h8 g* M9 i
]4 h* |/ B8 D* B) N4 o
LANG_TO_REGEXPS = {lang: regexps for lang, regexps in MAPPINGS}$ O# {) O* k* f4 Q( g
DICT_PATH =u'E:\Anki\Dick\L6mp3.mdx'
$ N/ \5 D# `7 K% J0 G) p, Y% a; o( k
3 x% h1 q B& S
C7 ^% Y# n: c% s: _( ^. t@register([u'本地词典-LDOCE6', u'MDX-LDOCE6'])' N9 D" I/ [1 o, P6 J: D( H
class Ldoce6(MdxService):
2 [, h2 N' G2 j( J& n0 |
: g- v9 B! _$ x" E% X* ?+ E4 Y9 S def __init__(self):; @- n! G+ o7 | m f
dict_path = DICT_PATH3 @# T% V* P1 H+ Y* [
# if DICT_PATH is a path, stop auto detect% W6 Z, N3 ^9 F, P
if not dict_path:& F( _6 t2 ?+ L$ t
from ...service import service_manager, service_pool
0 m: M9 ^8 F2 v; x F9 m; w for clazz in service_manager.mdx_services:( m* T; u: q: X/ Q+ r5 J7 ^
service = service_pool.get(clazz.__unique__), U: {# q: ]4 R) h2 ^
title = service.builder._title if service and service.support else u''3 w. z N- D+ Z- b# _7 E8 B
service_pool.put(service)
- c" j Y4 O& S5 V5 J if title.startswith(u'LDOCE6'):' k) t- a! A/ E6 u7 k# [- G
dict_path = service.dict_path
7 u% O3 A8 w* p7 x+ l6 D( s break
8 ]8 Q" M5 w O G% }' } super(Ldoce6, self).__init__(dict_path)
+ ^4 u) b' J! w3 b+ r o! f3 ]
; a" C& X. k& y5 Q O( x( v @property9 v6 K6 D8 w7 Q% v
def title(self):$ I+ e* C- e2 t
return getattr(self, '__register_label__', self.unique): p. C6 f$ D& x0 z" ]( T/ i8 u
' V3 g. [; x" x& J- W
@export('PHON')
4 U" b5 `0 F s! ^/ P8 x- V! f8 L def fld_phonetic(self):
, ~% O4 Y) ]2 C) r/ h& j7 t7 r html = self.get_html()
) h1 {& @7 S9 O; C+ E3 \. \ m = re.search(r'<span class="pron">(.*?)</span>', html)6 t# \1 W6 u% v$ m+ [4 M
if m:
; x( t$ l8 S# A* D9 [' S) I7 a* Q return m.groups()[0]( N% S ]" }. O
return ''
: B; ?! I& e" \6 }# o+ v; W, e- J! d+ l
def _fld_voice(self, html, voice):
l" P8 C. {; H' } """获取发音字段"""5 S' q) Y# y5 b0 Z, Q
for regexp in LANG_TO_REGEXPS[voice]:8 i; Y9 |2 b2 C, x( l, l
match = regexp.search(html)
; H. X* C! e) i8 R) b if match:
7 D, v1 X! {. d/ j: h val = '/' + match.group(1)
) G1 V3 Z( K( @1 j+ G& Q F2 K% P: P name = get_hex_name('mdx-'+self.unique.lower(), val, 'mp3')
: M% X! P) P. L R name = self.save_file(val, name)
4 R6 @7 C0 ~2 R5 Y if name:
+ j0 r4 d' e8 F F return self.get_anki_label(name, 'audio')
7 y" P% G$ E! i/ _# a return ''
5 I7 M8 k0 _; x% O& T
" h; Y [3 F7 k1 H+ u- ?- n @export('BRE_PRON')8 b4 r- d! z- s- o$ B0 {$ g+ Z
def fld_voicebre(self):* L; u) ]4 N' `- ^
return self._fld_voice(self.get_html(), 'br')
3 j. ]* n; t) G+ g/ h0 z1 i6 `4 Z% F- M+ E" m
@export('AME_PRON')$ X- ~6 b! _) N5 [" o& ]
def fld_voiceame(self):
7 y/ e; f' [ r) c return self._fld_voice(self.get_html(), 'us')
9 X- q* f$ z" K& x$ ^# i, D b2 \& i5 i% c! x; A
def _fld_image(self, img):
6 P" h5 m. s' n6 Q6 f6 L) \, x. U val = '/' + img8 M' p9 i) ~+ }2 t% }
# file extension isn't always jpg
* G" w( s, y$ H& ?8 R( X$ `+ e file_extension = os.path.splitext(img)[1][1:].strip().lower()$ \9 c {* s P; c# W
name = get_hex_name('mdx-'+self.unique.lower(), val, file_extension)5 `6 K; Z* n6 Q8 y0 M0 u; C
name = self.save_file(val, name)3 N. |4 E: w/ U/ o' u7 X
if name: y3 h& x _* }& y- D
return self.get_anki_label(name, 'img')5 N1 m5 q- f3 f5 ?% S" f
return '' l; `$ c: q" h/ t+ W2 Z# U
8 u' }2 P, e/ W5 K% F6 c
@export('IMAGE'). z0 }) j$ Y, X \' E4 b
def fld_image(self):
$ ` h' C6 Y* H+ Q html = self.get_html()4 M/ N3 h0 |% x4 ?& A! ~
m = re.search(r'<span class="imgholder"><img src="(.*?)".*?></span>', html)9 |# E( k: `9 O; f
if m:
8 [3 b3 w) V( l6 x6 I* k return self._fld_image(m.groups()[0])- Q% o: A. Z, g' F! |
return ''5 G. U- h( i4 Z! r6 H1 r
/ P# l3 s8 V$ U9 Y @export('EXAMPLE')
: q9 B; Y2 H6 d# I* E7 X def fld_sentence(self):! M% X+ h) a5 r& Y
return self._range_sentence([i for i in range(0, 100)])
( `& E2 D' R, Q
4 u) m$ G3 e0 \" f1 w, r% a def _fld_audio(self, audio):/ s3 p7 m% U% q( U6 _# W
name = get_hex_name('mdx-'+self.unique.lower(), audio, 'mp3')
/ i& A- U. U6 y' W+ d* Y# ? name = self.save_file(audio, name)
9 o) Y. w2 h. L% f C if name:
0 V! D8 @" m2 y8 F: A% G3 U/ w return self.get_anki_label(name, 'audio')
( h( W0 e: o T9 W- w$ @ return ''3 _8 S1 f8 d: [ S# w) i0 R
. m7 ~/ M3 g7 Z& y
@export([u'例句加音频', u'Examples with audios'])$ e4 ~: O. h) o- i5 ?
def fld_sentence_audio(self):
3 _, \& {* ~' l9 z5 p" q5 H F0 g return self._range_sentence_audio([i for i in range(0, 100)])" d' v% P" b4 z2 H7 y- a/ h% G7 p. I# `
) x( f/ } k& x+ U+ `% @( {
@export('DEF')! I9 p b7 d0 ^/ a+ S& F/ c
def fld_definate(self):
" r8 q" e+ L; ? m = m = re.findall(r'<span class="def"\s*.*>\s*.*<\/span>', self.get_html())' [; {, n( i) N' }. m
if m:
5 v* U# o! _( q7 G0 j0 K3 B soup = parse_html(m[0])$ p/ P+ \( o: M
el_list = soup.findAll('span', {'class':'def'})' ]5 ]/ O0 @, Z4 }. m$ X U% n) o# q
if el_list:
* t# t5 z; L% K& N- G7 u! i maps = [u''.join(str(content) for content in element.contents) % o+ E# ]4 `. O6 _- s
for element in el_list]
2 @! |/ I' v5 x2 j% k5 ^& c- i my_str = ''
' D/ M6 {: P$ P3 S7 f$ x' ]8 j for i_str in maps:
( _/ U' B5 {& E" N1 H6 Z my_str = my_str + '<li>' + i_str + '</li>'4 Y8 s- y$ T* s' h' n9 O) [- x
return self._css(my_str)4 A( x: w0 J3 b+ n @* K& |
return ''8 z# H, Y- r7 d+ V c' M
' m/ t8 S5 P; j J( j$ S4 N @export([u'随机例句', u'Random example'])
# F- Q- i& B b, ]- f7 p2 X8 S% h def fld_random_sentence(self):+ X& F( L+ f- p9 c
return self._range_sentence()
; z# D3 R" S) j, E4 X( q6 R9 Y0 `: ~
@export([u'首2个例句', u'First 2 examples'])
$ ^2 }# y8 E4 k7 c def fld_first2_sentence(self):3 Q7 j6 n: _' D
return self._range_sentence([0, 1])
: c8 O/ o! o% H: m2 ~5 ?2 F
) b9 J: i u8 q& m# [' H @export([u'随机例句加音频', u'Random example with audio'])9 x _$ I+ _: H* S1 d/ k4 u) Z
def fld_random_sentence_audio(self):6 |4 Y( ^- m. M$ x4 ]" I! U" ]
return self._range_sentence_audio(): N& P( ~' k5 F: |/ |
" B" ]. u6 l7 V- U) n @export([u'首2个例句加音频', u'First 2 examples with audios'])1 i% d) u& k/ b& X2 t1 U
def fld_first2_sentence_audio(self): T8 W5 k! [: B1 Z9 R
return self._range_sentence_audio([0, 1])8 F$ g! N! ^7 p: W
& ?- V& T, U! R- S5 [ def _range_sentence(self, range_arr=None):1 R1 n9 G& F0 E- t* v n$ C' G- a% o
m = re.findall(r'<span class="example"\s*.*>\s*.*<\/span>', self.get_html())
' d% |5 O; P$ q1 v" A- ~+ T2 y if m:
0 g% k* J$ [$ v$ J soup = parse_html(m[0])8 G. p: B t8 @" C6 D
el_list = soup.findAll('span', {'class':'example'})! N/ P! ^1 G' {1 ]+ A
if el_list:9 L; X# k1 e9 c# d* h
maps = [u''.join(str(content) for content in element.contents)
4 D2 o$ w; F) N for element in el_list]
7 p1 M0 a9 U/ F- j my_str = ''
/ i Q) N0 m- Q" }( y& ?$ |" R, [ range_arr = range_arr if range_arr else [random.randrange(0, len(maps) - 1, 1)]
% [% O! W: ^, a4 [* Q' Z( X for i, i_str in enumerate(maps):
5 G, @. q5 S0 Q9 C if i in range_arr:
' Q( v2 K; F% L* v3 V) w* { i_str = re.sub(r'<a[^>]+?href=\"sound\:.*\.mp3\".*</a>', '', i_str).strip()* \; d! J6 C+ O3 ?5 V
my_str = my_str + '<li>' + i_str + '</li>'8 [7 }' h+ g) }& {" A. }5 q$ p8 {
return self._css(my_str)
; T1 s2 \; x; E% } return ''" h. c$ E$ w+ e4 J# K; y. V* M
( J5 {1 q9 e2 i' F2 m
def _range_sentence_audio(self, range_arr=None):1 x$ p# ?' S" U- B4 p
m = re.findall(r'<span class="example"\s*.*>\s*.*<\/span>', self.get_html())$ x! @% Q* ]+ l/ S
if m:
0 Z* X6 `% v% T6 A7 J2 m soup = parse_html(m[0])
- S7 _$ B1 c1 C0 N7 A7 P+ d el_list = soup.findAll('span', {'class':'example'})/ i7 C( S" w$ \% j
if el_list:! V( k: n9 y0 B7 T! U4 l* Z6 I
maps = []
* b6 ?1 d5 J4 n for element in el_list:7 k6 K' g; z( p6 F% b1 L
i_str = ''
; ~' ?7 g$ L! [6 k( `9 C8 L2 H for content in element.contents:
6 `9 y! C2 A9 z% J; S8 [' y i_str = i_str + str(content)% W. W$ i. \5 S9 w' K9 H
sound = re.search(r'<a[^>]+?href=\"sound\:\/(.*?\.mp3)\".*</a>', i_str)
{% U1 A7 Z4 A& D8 n2 h if sound:
' f6 c, J8 f+ ^3 W0 b maps.append([sound, i_str])$ J" y% n Z6 W0 R; o9 m$ h
my_str = ''# L: S7 y8 A; X8 Z* Z
range_arr = range_arr if range_arr else [random.randrange(0, len(maps) - 1, 1)]! q: r: J$ n. W- Z$ D! }% j& N: q
for i, e in enumerate(maps):8 u, b: c+ @9 p
if i in range_arr:* |; }8 t0 b" n# K, J( ^" I" k
i_str = e[1]
6 r# r* ], ]; c& R# v' U sound = e[0]
- m# V d9 @- _6 ?2 Y) _1 m mp3 = self._fld_audio(sound.groups()[0])5 `; W% ]2 Q2 O$ J6 U& P
i_str = re.sub(r'<a[^>]+?href=\"sound\:.*\.mp3\".*</a>', '', i_str).strip()7 A7 h4 }" z% q) O
my_str = my_str + '<li>' + i_str + ' ' + mp3 + '</li>'- Z+ }' t, l, m: r5 t
return self._css(my_str)
$ d4 A& H4 A/ r2 D R return ''
+ z* e% f. A' N5 e( ?; x/ \( z; Z& {
@export([u'额外例句', u'Extra Examples'])
4 Q6 u7 N$ o3 M) l1 X def fld_extra_examples(self):4 E f$ G( k2 b$ P; o) D8 ]
lst = re.findall(r'href="/(@examples_.*?)\">.*?<', self.get_html()); Y" o# @4 H4 b( t4 f
if lst:. J3 G) q- j' @, {
str_content = u''
' W/ Q( c9 f M4 T for m in lst:
4 S3 e0 H2 G/ d4 N0 C/ K- | content = self.builder.mdx_lookup(m)
3 k: B( c+ M) c3 Z; { if len(content) > 0:
4 g& V6 O" x" K5 H3 g: H for c in content:1 y, E! }$ g$ N' d
str_content += c.replace("\r\n","").replace("entry:/","")3 c4 M' t$ R4 M y; V
return self._css(str_content)
- ]% K3 M, n; ?0 G return ''
- j/ u* ^ o0 V2 X3 K. n$ ]6 Z( H2 c& N
2 M: G- Z! p3 X( W) M" G @with_styles(cssfile='_ldoce6.css')
( r6 j) w' k( B6 z def _css(self, val):2 ^. h, B. h# n) o X* i; [
return val
7 p& q) _( p7 E6 D! P( ^3 ?( } ---------------------, T; P& U$ i6 @ K# A; H# p
/ ~8 J }' W, V( c7 y) f% q谢谢各位
3 G; d& g1 O7 k7 \. X) q
- N( b1 y/ d$ X2 W% l1 y" i* [; ^& l9 F7 s+ w* {8 {, y/ w% {2 U2 b
|
|