言語商会

Python

相関係数

import pandas as pd
import numpy as np
l1=list(np.random.randint(0, 10, 10))
l2=list(np.random.randint(0, 10, 10))
s1=pd.Series(l1)
s2=pd.Series(l2)
res=s1.corr(s2)
print(res)

よくあるパターン

面倒なのでよくあるパターンを列挙します。

#!/usr/bin/python3
# -*- coding:utf-8 -*-
#
 
from sudachipy import tokenizer
from sudachipy import dictionary
 
def wakati( sentence ):
  mode = tokenizer.Tokenizer.SplitMode.C
  return " ".join( [m.surface() for m in tokenizer_obj.tokenize(sentence, mode)] )
 
simplified_file = open('simplified.txt')
 
original_sep = []
simplified_sep = []
tokenizer_obj = dictionary.Dictionary().create()
 
with open('original.txt') as original_file:
  for original_line in original_file:
    original_line = original_line.strip()
    simplified_line = simplified_file.readline().strip()
 
    original_sep.append(wakati(original_line))
    simplified_sep.append(wakati(simplified_line))
 
for i in range(0, len(original_sep)):
  if i % 100 == 0:
    outfile = open('outfile-secion%s.txt' % str(int(i/100)), mode='w', encoding='UTF-8')
 
  print(i, file=outfile)
  print(original_sep[i], file=outfile)
  print(simplified_sep[i], file=outfile)
 
  if i % 100 == 99:
    outfile.close()
 (感想・要望・情報提供)