digital literacy for everyone
[lit][generate-title]
[lit]
[fig]
# 2018 mn
#
# Permission to use, copy, modify, and/or distribute this software for any
# purpose with or without fee is hereby granted.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
proginf "findsim 0.1, nov 2017 mn" print
now "" print
min 75 # percentage of similiarity to consider relevant
max 32768 # largest size of file to consider
python
from os import path
from hashlib import sha256
fig
cmd command join cmd " "
function getlist cmd
find "find -type f"
cmdlen cmd len
iftrue cmdlen
find "find " plus cmd plus " -type f"
fig
quot 34 chr
p find arrshell
x p len minus 1
p left x return p
fig
function size p
python
return int(path.getsize(p))
fig
fig
function sha256sum p
python
return sha256(open(p).read()).hexdigest()
fig
fig
files arr mid 1 0 # sizes
filen arr mid 1 0 # names
filec arr mid 1 0 # contents
fileh arr mid 1 0 # hashes
filed arr mid 1 0 # done already
filelist arr mid 1 0 # output
p getlist cmd
forin f p
now f prints " " prints
fs size f prints
now " " prints
ifmore fs max
now "max is " prints get max prints ", skipping" print
else
filen plus f
files plus fs
now sha256sum f print
fileh plus now
contentarr arropen f join contentarr " " split contentarr " " arrsort
content ""
oldc ""
forin c contentarr
iftrue c
ifequal c oldc
c oldc
else
now content plus c plus " " swap now content
fig
fig
next
filec plus content
fig
next
function getp x y
px split x " "
py split y " "
xlen px len
ylen py len
xc
yc
b ""
forin p px
c instr py p
ifequal p b
c 0
fig
iftrue c
now xc plus 1 swap now xc
b p
fig
next
xper xc divby xlen
forin p py
c instr px p
ifequal p b
c 0
fig
iftrue c
now yc plus 1 swap now yc
b p
fig
next
yper yc divby ylen
now xper plus yper divby 2 times 100 int return now
fig
filen print
quot 34 chr
filenlen filen len
for x 1 filenlen 1
for y 1 filenlen 1
a arrget filen x
b arrget filen y
fwd quot plus a plus quot plus " " plus quot plus b plus quot
rev quot plus b plus quot plus " " plus quot plus a plus quot
already instr filed rev
ifequal a b
already 1
fig
iftrue already
already 1
else
filed plus fwd
ca arrget filec x
cb arrget filec y
cas arrget files x
cbs arrget files y
cah arrget fileh x
cbh arrget fileh y
ifequal ca cb
now 100 prints " " prints
now fwd prints " # " prints
now cas prints " " prints
ifequal cah cbh
now cbs prints colourtext 8 " match: " plus cah prints colourtext 7 "" print
else
now cbs print
fig
else
per getp ca cb
ifless per min
now fwd
else
now per prints " " prints
now fwd prints " # " prints
now cas prints " " prints
now cbs print
fig
fig
next
next
home: [lit]https://freesoftwareresistance.neocities.org[lit]