TA的每日心情 | 开心 2019-9-4 10:12 |
---|
签到天数: 1 天 [LV.1]初来乍到
|
本帖最后由 jiaqiangmit2 于 2019-8-2 21:30 编辑
3 o! M$ S0 O" u! x6 @5 q' |+ r/ f2 i5 @2 ^
各位大神
6 M5 L% P R/ E我在anki里已经实现提取LDOCE6真人朗读例句的制卡。现在希望能将音频文件单独放手机,睡觉和开车等时间听。问题来了:anki对媒体文件的命名是以类似于HAsh方式“乱码”命名的,从媒体文件夹里复制出来,所有的例句和单词的朗读时打乱的。能否实现:单词以其拼写命名(example.mp3),对应的几个例句音频以单词拼写加后缀的方式命名,这样,放在手机里,单词和对应例句就可以一起读了。
/ R( E: [5 D! }7 E$ G& [& R) D- R& L# r+ M* w* o
( Z* O9 c, R9 i- @7 ]0 s! c如图,文件名是“l乱码”的
3 @2 r X2 _/ _. I- D无法上任附件,名字是这样的:1 t2 d, \: E3 I0 t/ g7 t
[sound:-ldoce6-b0ab2fba-11b6ffb4-d84db123-b00153f3-9971952a.mp3]
1 g6 R5 z& w2 A# _' x
, J1 g6 n9 v/ c8 q. u, F对应 脚本 $ Q* z2 m' [6 O, t% ~9 j& ]9 r
" t. Z1 g- T9 o1 C8 b( x+ S" n7 V
------------------------------
1 Z5 e- Q d; R- V- L* X/ q# Z
( \0 W" P4 S/ P8 R$ L7 x1 d: O; T" L, h0 j& P
#-*- coding:utf-8 -*-
9 M1 L0 \# D$ p* b/ q& C& timport os4 ^$ f$ v& h6 M* y! ~! N' A7 k [
import re% O+ r7 }9 f5 F2 h
import random
. N6 S% X3 \) L) `from ..base import *
- G( I. w6 v/ S! G4 l. |
9 H( F f V9 t
h* G3 G R4 Y0 G1 N' O5 Z" N7 sVOICE_PATTERN = r'<a href="sound://([\w/]+\w*\.mp3)"><img src="img/spkr_%s.png"></a>'# X- z1 q2 {8 L& W: ~; |
VOICE_PATTERN_WQ = r'<span class="%s"><a href="sound://([\w/]+\w*\.mp3)">(.*?)</span %s>'5 a6 O# p. v: C/ g, H& ^
MAPPINGS = [/ n2 N E7 r1 e: k& ]5 x
['br', [re.compile(VOICE_PATTERN % r'r'), re.compile(VOICE_PATTERN_WQ % (r'brevoice', r'brevoice'))]],
9 q6 V' B) t: `5 ?2 ?" r" L ['us', [re.compile(VOICE_PATTERN % r'b'), re.compile(VOICE_PATTERN_WQ % (r'amevoice', r'amevoice'))]]& A+ a' `3 I0 G" P( D& @) O o
]. x R+ Y0 J# L
LANG_TO_REGEXPS = {lang: regexps for lang, regexps in MAPPINGS}) y3 P3 U8 P4 T& u
DICT_PATH =u'E:\Anki\Dick\L6mp3.mdx'* l5 q. R0 X3 D- V
0 S9 j. R) W0 p, _
" ?4 b# H! Z9 h1 @( A@register([u'本地词典-LDOCE6', u'MDX-LDOCE6'])
) q" I4 t& W* S# Aclass Ldoce6(MdxService):6 K& r- K: Z1 \; g$ P
) _ V3 L" I/ L4 Y& ~
def __init__(self):
! L t/ o# T$ ~6 T D9 O) M dict_path = DICT_PATH6 O2 b9 }8 c( B1 q* M
# if DICT_PATH is a path, stop auto detect5 _9 z" ^; x' P' A7 \! r
if not dict_path:) V2 E$ ~$ s2 V$ i1 W
from ...service import service_manager, service_pool) _9 f' U1 k. ^& S" g' y
for clazz in service_manager.mdx_services:5 |5 x& D' {! m2 ]/ W
service = service_pool.get(clazz.__unique__)
o: V4 k; i; c8 K title = service.builder._title if service and service.support else u''
* ~: d" C5 o4 w: X% S' O service_pool.put(service)5 R1 W7 [$ q( ] K# v8 M
if title.startswith(u'LDOCE6'):
; r' Y* R4 g6 D0 E/ d- p dict_path = service.dict_path
: J7 y) W2 _6 b% R( a break" K0 L7 u- ]' b$ ^$ s g
super(Ldoce6, self).__init__(dict_path) r, x. p3 T8 {/ I
& K' T8 m& n% ~8 K @property
$ R# P9 p& x" x: O$ q' K; F def title(self):
6 V7 n7 v5 a' I9 Z return getattr(self, '__register_label__', self.unique)
! E9 l+ Y# q4 u; A) T
! L# D) j4 h% C# u @export('PHON')0 r8 Z9 j, |; x7 }! C+ s0 Y
def fld_phonetic(self):
: Y8 D5 ~- L4 G0 X html = self.get_html()/ g5 `& s8 |, R2 O1 B
m = re.search(r'<span class="pron">(.*?)</span>', html)* Q( s A) E. d3 m
if m:
5 R* v* B3 o; c: `0 v- b: N: J' h return m.groups()[0]% \( l' f$ n" j! w6 Q8 @
return ''
; O% V; e8 H" v" M/ X- h+ Z( E
+ H8 W( W: G. x7 l. }4 X6 o def _fld_voice(self, html, voice):
, ]" X* [$ G! a n0 _7 E3 B """获取发音字段"""
* Q. f! B$ H/ `" J) } for regexp in LANG_TO_REGEXPS[voice]:
0 B6 Y2 d( F0 a" @, x$ K* @ match = regexp.search(html)
8 y% s3 ?, \+ G3 D9 u if match:/ @& E* _3 }0 ^4 @5 C1 C
val = '/' + match.group(1)
6 u2 @, u1 v0 z* n0 n. I8 h8 Y& Y name = get_hex_name('mdx-'+self.unique.lower(), val, 'mp3')
* H9 Y1 y! C# {6 T. N& } name = self.save_file(val, name)0 f7 h: T! S7 |: ]
if name:
" b! X, v6 _: k1 k# V return self.get_anki_label(name, 'audio')% Z; G0 r% D: `8 [) l2 c+ d
return ''% \* |6 ~) x1 g+ E2 @/ o3 u
7 y- j- [( N3 J" D1 l, ?# ` @export('BRE_PRON'); p. \. L- U& L: R
def fld_voicebre(self):
, V* v; [7 k( q return self._fld_voice(self.get_html(), 'br')
. D: [" j& g- x y
" R1 H$ C0 B% L$ l$ W @export('AME_PRON')
5 ?) w, F& J) v: M def fld_voiceame(self):
/ g4 |1 e. F: o! f8 k& w$ i0 _ return self._fld_voice(self.get_html(), 'us')
$ b$ V( J$ U" n9 @" w* _6 g; ^" [9 E) L: a! O8 v# q
def _fld_image(self, img):
' O6 n0 S1 y% X. h8 g* w+ y1 e- I val = '/' + img
# W5 l$ S" j. j # file extension isn't always jpg4 g4 p, \% |0 S5 G
file_extension = os.path.splitext(img)[1][1:].strip().lower()
5 R/ K3 l$ K H4 t$ H name = get_hex_name('mdx-'+self.unique.lower(), val, file_extension)
& ]" q2 c& U+ A( G/ K* { name = self.save_file(val, name)8 o6 c2 _4 I7 {8 @4 b; c
if name:( _4 e9 I+ I( r
return self.get_anki_label(name, 'img')
' m' _. y1 x( \. ^9 E# @% ^ return ''( B9 l3 o" F* ]+ X6 A9 Q. B" G
8 [$ |$ r) C! L' F" t/ M' C
@export('IMAGE')
1 l- e) i( l; H% B z" p def fld_image(self):% t" L" _# n) w7 c8 J
html = self.get_html()
. @% u4 _- v: J4 Q' \) M/ q m = re.search(r'<span class="imgholder"><img src="(.*?)".*?></span>', html)
- H0 B. {8 D* q8 s: W) z4 c- ? if m:. K" j8 k9 x' c
return self._fld_image(m.groups()[0])) k7 {/ W0 c+ z2 U8 N- A
return ''
; R2 V" _( q4 V' L, ^
- o1 L) W+ |6 Y* z3 O& i0 U; M. v @export('EXAMPLE')
" A; a {2 }" b. Z2 C( C def fld_sentence(self):$ J. D4 v. l! {( s3 P. |9 `
return self._range_sentence([i for i in range(0, 100)])' b$ k9 S6 K, _" e( o1 v# i P
; F0 H; q) Q& o% J: o$ {! {# j def _fld_audio(self, audio):: H' ?4 G/ {, Q" F7 ?! A
name = get_hex_name('mdx-'+self.unique.lower(), audio, 'mp3')0 G0 j A; G7 e& V. A1 S3 l
name = self.save_file(audio, name)
6 o. a# C3 s* W% e# U! @* ? if name:
- m- l. }" d9 H( X4 Q return self.get_anki_label(name, 'audio')
" t% a! x- }) `* @ return ''2 R5 B$ j, H5 d+ V
. n* S8 A: J3 H% O- i2 `% z1 F: @ @export([u'例句加音频', u'Examples with audios'])+ M# n: _+ U7 x; u% k
def fld_sentence_audio(self):
! y. b" I. A1 |5 j5 _9 a return self._range_sentence_audio([i for i in range(0, 100)])& e4 j# G3 q+ {' g- D4 n+ s9 U% T
. U& _8 X% z: U1 v
@export('DEF')& b+ y$ e! U3 M" `, [
def fld_definate(self):. }! C }# o" ?4 A
m = m = re.findall(r'<span class="def"\s*.*>\s*.*<\/span>', self.get_html())
4 }# I3 I8 K n) H if m:
1 Z5 l4 ?0 K- I0 g! @ soup = parse_html(m[0])3 G8 U$ J$ q& x
el_list = soup.findAll('span', {'class':'def'})
, }/ p( D: m. `: V if el_list:
! k* Q c7 S8 M5 }0 E maps = [u''.join(str(content) for content in element.contents) ! v5 y# G# Z I# V* t4 f& U3 V# f. j
for element in el_list]! M$ B0 U: O% [! W4 a; D5 d$ @
my_str = ''# r5 [% Q* c3 T" T6 ?- V7 E
for i_str in maps:
1 [% [3 u$ f+ X T) m' o- o0 L- U my_str = my_str + '<li>' + i_str + '</li>'
& S1 F8 g9 L+ q7 {/ j! J9 Z" y return self._css(my_str). P) Z5 o" p0 ^, w0 B+ [* F
return ''
8 C! A' O) O% d+ F
' W; A6 L, Y. M! y @export([u'随机例句', u'Random example'])
( y" L) h/ ^! I4 j& m def fld_random_sentence(self):
d2 o: O! Z! J( N( G( M. e0 R5 l return self._range_sentence(); m' m+ a5 s1 t8 `# i$ ^
; Z U7 _# ?5 l. B \ @export([u'首2个例句', u'First 2 examples'])& x1 q0 [" v. V3 F
def fld_first2_sentence(self):
/ D1 R6 H3 M8 a) Z( C* x( P return self._range_sentence([0, 1]); q; c+ x0 C% |6 H+ ~; Z& Z
2 t6 x {+ h+ z9 \9 a( T
@export([u'随机例句加音频', u'Random example with audio'])- t \" O8 F: u# G8 [( ?
def fld_random_sentence_audio(self):8 t2 d5 U1 Y% q1 f; X# @
return self._range_sentence_audio()) O2 B' q3 j* w$ D2 X9 z
; ]+ o) Y1 R/ _8 e2 ^% j9 ^0 Q @export([u'首2个例句加音频', u'First 2 examples with audios'])9 X' a0 O6 c5 x5 X1 c) Z, p
def fld_first2_sentence_audio(self):) q1 L6 r& E2 E3 e& O
return self._range_sentence_audio([0, 1])
) P. }* b- a# J9 e6 P
& P+ l, p! a* X/ t5 C0 ~ def _range_sentence(self, range_arr=None):
9 D- |2 L' v, h; S& {) p m = re.findall(r'<span class="example"\s*.*>\s*.*<\/span>', self.get_html()), B6 @( E' c9 P3 z2 O4 |( P. ]; s
if m:, k9 i! M- L' q' [9 ~
soup = parse_html(m[0])! u+ G/ f% w8 A% U; g
el_list = soup.findAll('span', {'class':'example'})1 W D( z' Z& T2 @" q
if el_list:
; C' G5 e4 c' N$ F& h maps = [u''.join(str(content) for content in element.contents)
* ]5 [& O) a; b; m- o% ^( [ for element in el_list]- j0 y7 j X/ Y7 G( n
my_str = ''
) H7 t& G* f3 y( ^ range_arr = range_arr if range_arr else [random.randrange(0, len(maps) - 1, 1)]
1 D# _- h0 A }' L" d for i, i_str in enumerate(maps):
4 h" ^# W5 c8 ]/ D9 T* n if i in range_arr:
- g5 Y6 R0 O& p( r i_str = re.sub(r'<a[^>]+?href=\"sound\:.*\.mp3\".*</a>', '', i_str).strip()
9 n4 P, |9 F# {9 | d my_str = my_str + '<li>' + i_str + '</li>'. t; m, b2 R. b* {$ n- }* K
return self._css(my_str)
& h: \- L( j6 f5 A return ''& m! n/ h! Y9 m" I
" z: v! h* y$ }# O v6 J def _range_sentence_audio(self, range_arr=None):
U0 r5 _/ n! b, I2 i m = re.findall(r'<span class="example"\s*.*>\s*.*<\/span>', self.get_html())/ f! ]4 i0 P$ ?. {9 q: J
if m:" i$ d5 z$ u3 I
soup = parse_html(m[0])
8 T) E) u. i& ]' Y+ w! s) ` el_list = soup.findAll('span', {'class':'example'})8 w+ l5 R4 e, ^" j
if el_list:$ `* L. e2 G" U$ B" } P
maps = []% b2 q5 Z$ r& E# @: J
for element in el_list:. r7 ^# S7 Z, }1 @' e# U& z
i_str = '' |& ] O( |+ [: D3 `( X7 }8 X
for content in element.contents:
) L1 z# j* C; v i_str = i_str + str(content)% D9 t" [- l2 d) \0 b
sound = re.search(r'<a[^>]+?href=\"sound\:\/(.*?\.mp3)\".*</a>', i_str)
( l3 F( y+ _$ u. L7 s if sound:0 E% i- [' W8 J3 O- I5 C$ Q) t
maps.append([sound, i_str])
! @* I, ~3 p( ]# `& k t1 l my_str = '' W7 D# F) @& \1 L; x! n
range_arr = range_arr if range_arr else [random.randrange(0, len(maps) - 1, 1)]) }& ~: U# J: e
for i, e in enumerate(maps):' \- ~, f, ~( v% r
if i in range_arr:% D, b2 R6 K& h+ g( @0 c
i_str = e[1]% A/ V* t5 R9 t6 k
sound = e[0]
; H3 K$ [) S" B mp3 = self._fld_audio(sound.groups()[0])7 ] E( T8 V* u9 _; @/ K2 T
i_str = re.sub(r'<a[^>]+?href=\"sound\:.*\.mp3\".*</a>', '', i_str).strip()$ J. f) w+ a8 M. h: A/ h
my_str = my_str + '<li>' + i_str + ' ' + mp3 + '</li>'
2 U& @- P( y6 {: ]+ ^% I) V" \" [3 O return self._css(my_str)
* o: y6 w9 o+ l, f, I; F return ''
& D; O! T# }" b8 _6 h
" c5 A! f J9 v0 j/ r1 l @export([u'额外例句', u'Extra Examples'])
+ x/ Z" b* f- _7 q. _ def fld_extra_examples(self):
0 T1 \' E2 w' d9 b1 h! U. | lst = re.findall(r'href="/(@examples_.*?)\">.*?<', self.get_html())
! h# C5 A+ W; v8 Y" I if lst:' I# l/ ?) r; x, }8 F% X7 D& M
str_content = u''
' S, t9 x7 N7 z* X1 ~ for m in lst:
; X' x, N0 F% w2 G; ^- l content = self.builder.mdx_lookup(m)+ h: ~5 L8 `& x7 x
if len(content) > 0:
% t! [5 i% ]6 V: _6 G5 B+ O for c in content:- p7 A- T9 c# B% ]% `9 j
str_content += c.replace("\r\n","").replace("entry:/","")
0 x5 E9 `- m' @ A1 v return self._css(str_content)2 D: \+ A5 @9 V5 [: V% y$ R/ A
return '' d: F: u6 ~+ C- n- i
9 o0 e0 O, z# ~/ m* }5 `, J" P6 M0 v. i
@with_styles(cssfile='_ldoce6.css')
0 R5 \0 x9 q) F/ g# q; z def _css(self, val):/ |' d: h, I) }9 ? ]) f, ^
return val
6 t, A( Z0 x- X- ?) c' E4 `+ K$ { ---------------------$ ?' R1 E* }* w. i
' f: I; R7 q, Z, B' }5 M9 J/ Z
谢谢各位: U0 Q; w* v5 b9 Q
' E5 x/ a2 W$ R; x
# y- ^9 A# G- L( ~( U7 ? |
|