TA的每日心情 | 擦汗 2021-11-17 09:18 |
---|
签到天数: 79 天 [LV.6]常住居民II
|
发表于 2021-10-6 09:52:48
|
显示全部楼层
+ H* j* {6 `, `# I(3) 在哪里加入- current_path = os.path.dirname(__file__);current_path+"/OALD4_azure.txt
复制代码 & b. T K& { ]# N! I
/ N, I0 x4 [0 }1 d2 w* x
我的版本是 3.9.7 ,目前没有遇到 No such file or directory 报错。 然后是genMDX_ox4.py 文件有部分中文乱码
7 T) y; r; L" c) c
% {/ r5 Z, s4 F2 A( E2 S* r: K- # -*- coding: utf-8 -*-- v1 L. Y2 F! E2 \/ |' o/ n
- # encoding=utf8
: h$ y; J/ N# I6 H7 B# X0 z - * X: ?. t- K- S* h3 n7 r
- from __future__ import unicode_literals,print_function, absolute_import, division
$ ^+ U* s& u' Y" W1 |* L3 j
) ?9 x1 K; Q" V* A, S& M4 [* p( {- " ^! ^# I6 F& t* n5 U% f
- import re/ {& |" T( \; M7 `6 M1 `' ~
- import copy. k' n# h8 w% [; a9 I
- import chardet7 N' t9 q) h/ _5 B7 N: P
- , J$ g e% S5 O6 |+ J
- import os
" c, J6 ?% l. R/ e% T - import io& p7 s6 M6 j$ J% l
- import sys% P$ R' R7 B. w
- # reload(sys)
$ U) q1 S5 R5 A" ?: ?2 x - # sys.setdefaultencoding('utf-8')
+ @5 ~4 V9 J5 g5 j* h9 j. R - 1 m! O5 J; u9 H* x1 q
- import collections3 X0 j0 ^" b$ k9 q8 v
- from collections import defaultdict* _. c' a+ ]% [, s9 g% ~
- 8 M" { x$ {2 F7 X1 @' G$ `, s1 I
$ O, h/ I7 Q. P' \, \( l- from writemdict import MDictWriter, encrypt_key
( G* }4 O9 s/ s - from ripemd128 import ripemd1287 _! y; |7 O& U* n/ O7 X% k, R
- 9 r! o/ ~) h4 R, X- W4 w& U* \" Y
- " u( M9 A, Y0 g+ s6 N: [% F( l
- head = 02 B% [6 N. a3 Q( `: P
- new_mean =[]
( `5 z l1 L2 Q( v2 M9 h6 x - f=io.open('OALD4_azure.txt', 'r',encoding='utf-8')
0 M. E) _+ q3 N# A+ s* [: V9 M - #f=io.open('oxford2_original.txt', 'r',encoding='utf-8')
& r4 i$ ~& A6 `, J9 { - d = defaultdict(list) #����һ�����ֵ䣬Ҳ��ʹ��{}������6 P, _& g' ~: b( Y" @- U# c0 n4 n
- for line in f: #ÿ�δ�f�ж���һ��
4 }0 }3 m+ j1 p6 O! Y7 n0 v2 ^ - line=line.rstrip('\n')#ȥ����β�Ļ��з�
: D* N5 Q! W5 L- Q( r7 ?9 y" \ - if line == '</>':4 F, z+ U; ~3 r. h
- if head == 2:' h$ V8 ? V3 `( \' L
- new_mean[0:] = ["".join(new_mean[0:])]
) v3 g' ]1 \* t$ Q% k' i$ r - d[word].append(new_mean[0])7 d# U3 D1 T- c% f9 Q5 q8 d. `
- head = 1;+ x* H0 u2 q& c
- new_mean =[]" h, U0 g; d: H8 ^9 V' r
- elif head == 1:+ o& e6 B9 h1 T* ?& d
- word = line" s) Y! U; w, h0 h# R
- head = 22 k+ K/ Q; _5 q6 H W
- elif head == 2:, n0 V! F& s9 e# c: t* @2 h! F
- new_mean.append(line)
* N7 D1 f2 R- n, F$ x - head = 2/ ?% Q' y- X j
- f.close()
: R, i4 y- F: U S
! W4 [8 ^0 \; p1 E& J
. I1 I. m1 M- d* o- ff=io.open('about_OX4.txt', 'r',encoding='utf-8')#�ʵ�about��Ϣ��txt�ļ��뱣��Ϊutf-8
' h4 z E# ^5 c! @9 a - about=[]$ d8 m; I4 d8 H, P" D
- for line in ff: #ÿ�δ�f�ж���һ��
' u i" Q( ~* @# S( ` - about.append(line); j, z; |; J- x- ?! ~( \+ _2 Z' g8 J
- about[0:] = ["".join(about[0:])]
# ^. f. |' s. e) v - 0 H' a/ H9 w4 ]
- % M: |) y2 ?; M9 [1 v
- #outfile = open("example_output/��ţ��Beta_V2.2.1.mdx", "wb")
4 f& p3 Z: v# E2 L: K8 I L- ~( Y0 g3 _! s - #writer = MDictWriter(d, "��ţ��Beta_V2.2.1", about[0])
& ^, I1 M% n3 b5 y& u - outfile = open("output_ox4/OALD4_Ex.mdx", "wb")& v5 q; h( H) t$ A- S- D+ y
- writer = MDictWriter(d, "ţ��߽�˫��(���İ�)", about[0])& v o" d) {5 F
- writer.write(outfile)
U& N- t* ?, l0 E/ e2 e - outfile.close()
8 }2 {9 c* s4 X1 f9 F; r7 |
9 l) w' L4 P" O4 c
复制代码 ( T) w: t3 E1 I1 [+ X% q7 Q
3 o- Z, l U8 L& N
/ R0 z8 y4 \* g6 G" c2 u
是否可以看看你的文档呢 乱码的部分中文写的是什么? |
|