TA的每日心情 | 开心 2019-9-4 10:12 |
---|
签到天数: 1 天 [LV.1]初来乍到
|
本帖最后由 jiaqiangmit2 于 2019-8-2 21:30 编辑
6 [' K, P& y: L/ B1 y* U( z- _+ n5 t; E: \
3 O% c$ b8 \' m各位大神
g2 a7 F: T s# H+ G5 \& r, M我在anki里已经实现提取LDOCE6真人朗读例句的制卡。现在希望能将音频文件单独放手机,睡觉和开车等时间听。问题来了:anki对媒体文件的命名是以类似于HAsh方式“乱码”命名的,从媒体文件夹里复制出来,所有的例句和单词的朗读时打乱的。能否实现:单词以其拼写命名(example.mp3),对应的几个例句音频以单词拼写加后缀的方式命名,这样,放在手机里,单词和对应例句就可以一起读了。
3 x- C, s* p+ m- `# ?; ~% U: B1 A- x* b* D2 ^- s6 f1 s& D0 V$ y/ b0 _' q* c
! F. v( \4 z: v: c; O
如图,文件名是“l乱码”的# i. i) M* F. a+ D7 {6 A
无法上任附件,名字是这样的:0 P) r2 Z1 N. w% r- E" Y% l1 x
[sound:-ldoce6-b0ab2fba-11b6ffb4-d84db123-b00153f3-9971952a.mp3]: q X# o2 J7 H
$ N4 _, M2 s8 G" G- ^2 w* m' X对应 脚本
, F" w& i6 r" j, L9 E$ t. Y6 C) G8 q a/ ?: ^5 F
------------------------------7 r$ J/ u9 t4 \: B. R2 K
5 _3 ?! D+ `; P* H8 C% k- M: k6 _
9 C+ ~" N y: z3 E#-*- coding:utf-8 -*-2 d' ~/ B( k x) Z4 N
import os
9 Z& b/ U5 a- `, Q! w, u" _import re
# j, r% v" @( P2 Q# P$ K3 A5 _5 J* f9 [import random
# F# B- V. p0 M0 L, h: ofrom ..base import *3 m# K. Z+ _+ {* k, R$ {
5 j4 U6 B7 t4 s0 k6 n/ s' m# X' B" R7 U9 h+ ~$ }4 |
VOICE_PATTERN = r'<a href="sound://([\w/]+\w*\.mp3)"><img src="img/spkr_%s.png"></a>'/ O4 {7 z8 F3 o; \+ v; d5 V
VOICE_PATTERN_WQ = r'<span class="%s"><a href="sound://([\w/]+\w*\.mp3)">(.*?)</span %s>', n* D+ u& y9 I' u7 G: s* j4 E
MAPPINGS = [
E# W+ q1 a! y: i4 n# u% c ['br', [re.compile(VOICE_PATTERN % r'r'), re.compile(VOICE_PATTERN_WQ % (r'brevoice', r'brevoice'))]],! M! ~& y+ y1 q5 A
['us', [re.compile(VOICE_PATTERN % r'b'), re.compile(VOICE_PATTERN_WQ % (r'amevoice', r'amevoice'))]]3 y7 r( D& R% q- \) U. X6 @
]
; `1 S _( }0 ]& KLANG_TO_REGEXPS = {lang: regexps for lang, regexps in MAPPINGS}
# K d' r; O' MDICT_PATH =u'E:\Anki\Dick\L6mp3.mdx', t& G0 S/ [' H; G* X! |3 f
: k3 V" v0 Z3 j, r' @. f
& i: U: v( N( {@register([u'本地词典-LDOCE6', u'MDX-LDOCE6'])! k9 \+ M" E, n
class Ldoce6(MdxService):0 y9 [( r% [. J
# m7 Z" f& \$ } n/ E def __init__(self):+ y2 J" |* M: T* E% }# x# U: c
dict_path = DICT_PATH3 X: ^6 g: N% I b- X2 U) s
# if DICT_PATH is a path, stop auto detect+ ~$ T3 d T1 K+ U/ s% D4 P* u
if not dict_path:
% [+ e1 V! e* k from ...service import service_manager, service_pool
4 ^9 S* i3 w) ?# S3 a6 f- V' r" T for clazz in service_manager.mdx_services:. H+ m! V; u4 i# ^
service = service_pool.get(clazz.__unique__)
" D+ I, M' V n8 Z" w, F title = service.builder._title if service and service.support else u''6 B* i8 g4 Z8 d9 Y( A3 N% U5 I( d
service_pool.put(service)
o! L% O4 ` \! T5 ` if title.startswith(u'LDOCE6'):5 N0 f8 |- _3 `+ v) S
dict_path = service.dict_path
$ |9 H7 _$ ~$ `9 @) T. R( k: c break
1 R, q1 P# e; l* `, p5 L super(Ldoce6, self).__init__(dict_path), Y3 b7 k) u; z6 X3 k6 y! N
8 G: ]6 M* \9 E( J! y5 r
@property7 E* Y3 E) h$ J. Q
def title(self):, s: w2 G3 V! I
return getattr(self, '__register_label__', self.unique)4 ]+ `1 t7 _: b4 T3 h: Z! o( e
8 u4 ?. R3 }% y8 W4 R
@export('PHON')
5 |3 I2 V8 y& Q+ d; \ def fld_phonetic(self):, E' I) T! V% U; ]7 s U
html = self.get_html()
) c. }7 x- i: o' z* b& ]$ s m = re.search(r'<span class="pron">(.*?)</span>', html)% f/ _( u! E6 W) n! n
if m:6 p. G4 q! p' c3 H" S# C0 x
return m.groups()[0]
% i) w3 N2 B& y9 j8 j- s9 W return ''( W5 T" c6 ?$ d* G
( I8 S% P$ n2 l) f& L. `9 t# J def _fld_voice(self, html, voice):9 o! u$ \9 r8 ~% @9 e4 S
"""获取发音字段"""
4 l4 M6 M4 w- H t/ x# R; w for regexp in LANG_TO_REGEXPS[voice]:
- ?; {7 P* [: t8 w9 ` match = regexp.search(html)+ G) d1 h0 d1 s% o) l/ J- [
if match:5 v' ]. T! `% z; H' l$ U6 q
val = '/' + match.group(1)
# G& W% x5 `( _' e$ Z name = get_hex_name('mdx-'+self.unique.lower(), val, 'mp3') [7 s. a$ c7 C
name = self.save_file(val, name)
7 ` G k& `) O# d# K& l if name:
' l% `% c s# N( ]* c- |8 y return self.get_anki_label(name, 'audio'). d$ U0 w {" ]" | o: |8 O! \
return ''/ ^ z# d( z A2 m
& i9 l, k$ y5 l2 F, G& I5 t# H
@export('BRE_PRON')& O4 N2 |& z2 ~$ @
def fld_voicebre(self):
3 x7 s, X1 q$ U2 o return self._fld_voice(self.get_html(), 'br')2 Z% e+ M* |! q1 M* a
' o1 B7 F4 T9 M2 H) i @export('AME_PRON')' ]% W$ d2 [( Q; q
def fld_voiceame(self):1 X' x" x9 V, Y) b, w
return self._fld_voice(self.get_html(), 'us')
+ h/ g- i- \5 }% x& C. k S; I7 P4 @9 x. _% I& K: p4 R5 e
def _fld_image(self, img):5 _4 ]2 i7 N0 \
val = '/' + img
+ \8 W9 l' A( N # file extension isn't always jpg
l8 n+ q: U5 i0 W/ L8 k1 W ]* d file_extension = os.path.splitext(img)[1][1:].strip().lower()
* J7 S) e( B. |) B6 {, ?( g name = get_hex_name('mdx-'+self.unique.lower(), val, file_extension)
7 e3 _8 `5 B9 O* E. `, H4 X name = self.save_file(val, name)
/ K/ R+ Z" o) e/ B; C. n$ D& ]1 } if name:
. u: @7 ^ c. s0 m, v1 C- @" O- A( W return self.get_anki_label(name, 'img')- O I+ e, m6 M- h7 e/ U
return ''+ F$ e: C+ y4 D) |
~9 Y" [% f# R3 Q" {8 ~ @export('IMAGE')6 t* h' {) R* I
def fld_image(self):( E4 c' l6 k% [- n, P. N
html = self.get_html()5 t) P1 L2 S" Y' h6 Q- C7 a
m = re.search(r'<span class="imgholder"><img src="(.*?)".*?></span>', html)
0 p$ E# H- N4 F! m if m:+ I3 ^( v2 n d
return self._fld_image(m.groups()[0])% K7 e( m2 w1 H" k/ L: K
return ''
+ W9 V( r' m1 X* L7 e. d3 E& J" m" X6 u2 y/ W% o8 K2 i
@export('EXAMPLE')
^. j$ A* H D: i def fld_sentence(self):
/ b+ \ |, B5 Q3 o2 U4 e; X return self._range_sentence([i for i in range(0, 100)])
2 u- z9 N2 F0 G* t. f$ u9 H" n4 H
) z: S6 _, i. U% c" t def _fld_audio(self, audio):
: ~/ D2 B+ S) m3 w7 \6 a0 Y name = get_hex_name('mdx-'+self.unique.lower(), audio, 'mp3')
$ }; u b0 t8 l name = self.save_file(audio, name)
- d8 c$ z! @( L0 U if name:
5 B; x1 y1 i( S+ V% l return self.get_anki_label(name, 'audio')! A: F8 Q3 j+ H! S: Z! Q
return ''
8 E E+ }, u) h; G$ v8 C+ u% }+ k: `2 l! i S' P C
@export([u'例句加音频', u'Examples with audios'])
6 A5 _0 {- u' @- T def fld_sentence_audio(self):
0 w( b* l' }. A/ e! T! _ return self._range_sentence_audio([i for i in range(0, 100)])
$ U0 V6 u$ M0 i' U7 [0 f2 L. C- V7 D" V' s6 Z! f- o6 v! y* o8 O
@export('DEF')
. _, _. L1 D% _! H' q5 O2 S def fld_definate(self):
- h7 U6 T. n) c7 r, b4 f m = m = re.findall(r'<span class="def"\s*.*>\s*.*<\/span>', self.get_html()). G+ {- }: m' g0 ]- F
if m:
1 h+ ~- {8 R D3 |8 q soup = parse_html(m[0])5 F. y0 M9 j- W% t& R$ _6 p
el_list = soup.findAll('span', {'class':'def'})$ s2 Z- {- U5 O/ O& W) \1 ~; F
if el_list:% i) P! P* z' N, _ {: k) a1 E) m
maps = [u''.join(str(content) for content in element.contents)
/ U/ L7 `. K" h8 K for element in el_list]) A9 c4 t# R& |. \
my_str = ''
2 w; A s9 }6 E3 M) A s for i_str in maps:
7 W" t; H; I' t6 { a my_str = my_str + '<li>' + i_str + '</li>'" C+ o0 l% q* Q9 M
return self._css(my_str)& i( e8 @/ V, T, s! p0 _* P
return ''
0 |) k; t U/ T( e! z, I% s/ d; t) Y
) I8 |, `5 ^# E8 O @export([u'随机例句', u'Random example'])) U5 n0 ?0 b, d) ^, V4 k2 ~
def fld_random_sentence(self):
% b- i( R/ w# b; j. F2 H return self._range_sentence()
4 L1 [ w5 k0 N; T3 X. |) U2 h( q7 M& V+ g( o) W
@export([u'首2个例句', u'First 2 examples'])
5 g' c4 X2 N* I' s def fld_first2_sentence(self): u- G: ^ ? X- n* z
return self._range_sentence([0, 1])
+ p' p% v! k5 B% `1 z5 _! `
G |6 J( ~% ?% | @export([u'随机例句加音频', u'Random example with audio'])
) m7 {& S3 C, A2 L% F% D def fld_random_sentence_audio(self): ]: E( C* [+ ?. z0 f
return self._range_sentence_audio()
5 B. e1 D _, ?: y; B9 ?" R/ a- J$ G
@export([u'首2个例句加音频', u'First 2 examples with audios']): a" \+ |/ k' ^% h) F
def fld_first2_sentence_audio(self):: t$ `2 I* p+ i
return self._range_sentence_audio([0, 1])
$ p% M& _* F% P. g( g. E5 F
: k- Y6 ?# |$ @) c. B. o def _range_sentence(self, range_arr=None):- p. G* }$ S4 ~8 W; e
m = re.findall(r'<span class="example"\s*.*>\s*.*<\/span>', self.get_html())
) m1 A/ X3 }; J5 @ ~9 @4 r if m:% ?! Z/ H* R/ N) n
soup = parse_html(m[0])
0 z. \ V" l4 |/ o el_list = soup.findAll('span', {'class':'example'})- L; }3 E& D X8 L. i* y( l
if el_list:
* f' f% ^. @2 |( J maps = [u''.join(str(content) for content in element.contents)
8 O/ k0 B% Z0 r7 [7 w for element in el_list]
) P, t; X" }! }9 {8 ^& p' i7 q) J% V my_str = ''0 f, q6 y5 F9 C
range_arr = range_arr if range_arr else [random.randrange(0, len(maps) - 1, 1)]
u" o/ P7 K- s1 C" R4 |; I for i, i_str in enumerate(maps):
5 n/ `9 V/ Z% |9 A4 B4 W/ ~- r! s if i in range_arr:1 C0 i! }: f3 K- r: j* ]) x
i_str = re.sub(r'<a[^>]+?href=\"sound\:.*\.mp3\".*</a>', '', i_str).strip() w, Q5 \: a" e, s. d5 B
my_str = my_str + '<li>' + i_str + '</li>'
9 g2 a# A3 `4 Y4 k* @ return self._css(my_str)/ X0 w/ P1 X0 |1 m9 U+ [; P
return '', ?( p5 C9 Z) O3 W: f( U, \5 t
& @; {6 X/ q u* W$ L4 l
def _range_sentence_audio(self, range_arr=None):& q! a+ L3 I6 W$ I
m = re.findall(r'<span class="example"\s*.*>\s*.*<\/span>', self.get_html())9 V1 o) B$ k. x' z! r
if m:
! e0 |$ O; B. J; v3 w! l6 b$ a3 z' ^ soup = parse_html(m[0])0 q# ]$ M4 \0 R1 M$ m* }
el_list = soup.findAll('span', {'class':'example'})
1 Y* D& n7 `- V# B if el_list:
( a( S4 T( [2 ]% O' L$ R: Z8 V8 e! B maps = []" Y4 `8 y0 Z9 u. }
for element in el_list:4 }* d' l9 R; Q# j
i_str = ''
8 g+ z* i% x9 R$ J! d. u for content in element.contents:% b) e6 B+ R9 S8 z
i_str = i_str + str(content)% I* d# W! S0 r! H1 H3 f5 a
sound = re.search(r'<a[^>]+?href=\"sound\:\/(.*?\.mp3)\".*</a>', i_str)
6 b% c7 y- L, W% C" ? if sound:, j5 u8 ?0 |, ?: {0 J: `; a
maps.append([sound, i_str])) w' G" v! g+ w# _% j7 y
my_str = ''7 B. }! ` f7 I2 J
range_arr = range_arr if range_arr else [random.randrange(0, len(maps) - 1, 1)]* ?5 d( R5 F- X
for i, e in enumerate(maps):" {" n( a/ H }7 F8 \
if i in range_arr:) J% X8 Q- r' }8 A8 ]) d- F
i_str = e[1]
0 e1 J2 G( j* l1 t: ]! `2 e sound = e[0]
, A. f% s# l; t mp3 = self._fld_audio(sound.groups()[0])
, u8 @" t" q4 H/ y8 s( \ i_str = re.sub(r'<a[^>]+?href=\"sound\:.*\.mp3\".*</a>', '', i_str).strip()) H) L$ K' V# D1 s1 h+ [" S$ G
my_str = my_str + '<li>' + i_str + ' ' + mp3 + '</li>'% K1 J" C c2 [" v
return self._css(my_str), Z5 K* M1 K0 W [' }
return ''! m2 S5 r0 N7 f( k
9 I2 Q+ D8 d" b& w( K
@export([u'额外例句', u'Extra Examples'])
6 H+ I: ?8 f+ B2 o6 @; l def fld_extra_examples(self):
( D h! h- Y; [1 B lst = re.findall(r'href="/(@examples_.*?)\">.*?<', self.get_html()), O/ ^0 a* Y' r* e% n* v' h# ^
if lst:
1 N& I. v: J( v) F6 U2 t str_content = u''3 x' M$ q) J. R: y
for m in lst:
6 C6 {9 a* c. f0 ]" d5 | g/ s' c content = self.builder.mdx_lookup(m)( n# T) K# I- J1 X1 L
if len(content) > 0:* n4 `. H, W Q8 v/ N* ~
for c in content:
$ q, v$ {' j" a$ B4 {# y- L str_content += c.replace("\r\n","").replace("entry:/","")$ J4 ^, g9 U7 S* j/ C
return self._css(str_content)
' ^1 ?$ I6 Q5 ^$ m; l9 C return ''
) l) S0 z9 i X+ b/ Y9 ]% G1 m) z) F" n2 h( s* j
@with_styles(cssfile='_ldoce6.css')
7 @. a/ B) r- B5 } def _css(self, val):
4 y0 _6 C7 A5 n+ _) N return val
8 z. R! L) z2 p! Z6 g& x% d' k ---------------------
' ]" x8 G7 k( o8 V' s+ T+ t% i2 K5 k+ x
谢谢各位' O3 Q' ]; H- H) }/ G/ Q1 c: Z
# ^- t! p3 _5 q8 A% W! E1 M3 F/ ?- \6 U4 i
|
|