[SLD] ab uno disce omnes »po enem spoznaj vse«
Contents
It’s simple, but simple isn’t the same as easy
Old Man’s War (John Scalzi)
Tukaj sem nametal nekaj skriptov, za katere se mi zdi, da jih bom še potreboval.
RMD narediš lahko malo lepši s pomočjo teh navodil
Črtna koda iz PDF
Za čitanje PDF uporabim poppler (http://blog.alivate.com.au/poppler-windows/)
Črtno kodo čitam s pomočjo knjižnice ZBAR (https://sourceforge.net/projects/zbar/files/zbar/0.10/zbar-0.10-setup.exe/download).
S pip install pyzbar si namestiš pythonov vmesnik.
Deluje nekako tako:
prečitaš PDF
v io (pomnilnik) shraniš sliko
iz slike prečitaš črtno kodo
Pseudo koda:
images = convert_from_path(PDF_File, poppler_path = POPPLER_PATH) #iz PDF v sliko
png_bytes = io.BytesIO() #shranjujem v pomnilnik
images[0].save(png_bytes,'PNG') #shanim sliko v PNG formatu v pomnilnik
im = Image.open(png_bytes) #iz pomnilnika nalozim v PIL format slike
barcode = pyzbar.decode(im) #precitam bar kodo
text = barcode[0].data.decode("utf-8") #razstavim bar kodo in pridobim vsebino
KLIKNI ZA KODO: Tukaj je celotna koda, vključno s tkinter GUI
KLIKNI ZA KODO: Tukaj je celotna koda, vključno s tkinter GUI
# -*- coding: utf-8 -*-
"""
Created on Wed Oct 9 14:11:56 2019
Preberi z bar code
@author: slanad
# NAMESTI PRED ZAČETKOM
# #ZBAR
# #https://sourceforge.net/projects/zbar/files/zbar/0.10/zbar-0.10-setup.exe/download
# # pip install pillow - mogoče bo treba namestiti verzijo 4.0
# # pip install minecart
# # pip install PyPDF2
# # pip install pdf2Image
# # https://blog.alivate.com.au/poppler-windows/
# # Poppler for windows: http://blog.alivate.com.au/poppler-windows/
# # bin moraš imeti v poti, to narediš s sys.path.apend("pot do poppler-068.0/bin")
# # deluje tudi ce dolocis v poti pot do popplerja - glej spodaj ukaz
# # images = convert_from_path(sFile, poppler_path = POPPLER_PATH)
"""
# %% Common routines
import os
import sys
import datetime
import tkinter as tk
from tkinter import filedialog
from tkinter import messagebox
import subprocess
import io
from PIL import Image
from PyPDF2 import PdfFileWriter, PdfFileReader
from pdf2image import convert_from_path
from pyzbar import pyzbar
#import pytesseract
SAVE_IMG_FLAG = True #ali shranjuje vmesne rezultate ali ne
# nastavi glede na racunalnik
if os.path.exists('D:\\OneDrive'):
#SLD DOMA
POPPLER_PATH = 'D:\\OneDrive\\Dokumenti\\Python\\PdfOCR01\\poppler-0.68.0\\bin'
sys.path.append('D:\\OneDrive\\Dokumenti\\Python\\PdfOCR01\\poppler-0.68.0\\bin')
#sys.path.append('D:\\OneDrive\\Dokumenti\\Python\\PdfOCR01\\ZBar\\bin')
#pytesseract.pytesseract.tesseract_cmd = r'C:\Users\Slana\AppData\Local\Tesseract-OCR\tesseract.exe'
elif os.path.exists('C:\\Users\\slanad\\OneDrive'):
#SLD V SLUZBI
POPPLER_PATH = 'C:\\Users\\slanad\\OneDrive\\Dokumenti\\Python\\PdfOCR01\\poppler-0.68.0\\bin'
sys.path.append('C:\\Users\\slanad\\OneDrive\\Dokumenti\\Python\\PdfOCR01\poppler-0.68.0\\bin')
sys.path.append('C:\\Users\\slanad\\OneDrive\\Dokumenti\\Python\\PdfOCR01\\ZBar\\bin')
#pytesseract.pytesseract.tesseract_cmd = r'C:\Users\slanad\AppData\Local\Tesseract-OCR\tesseract.exe'
#Testiraj pogoje
if not(os.path.exists(POPPLER_PATH)):
print("NO POPPLER !")
exit
#Test data
sFile = r"C:\Users\slanad\OneDrive\Dokumenti\Python\PdfOCR01\test\PrinterINST_3_CB.pdf"
d1_w = 100/2340
d1_h = 700/3310
d2_w = 350/2340
d2_h = 1250/3310
DatumF="2019-10"
saveImg = SAVE_IMG_FLAG
# fPdfBAR1(sFile) #za testiranje funkcije
#%%Precitaj PDF
def fPdfBAR1(sFile, DatumF="2019-10", d1_w=0.0437, d1_h=0.2115, d2_w=0.1496, d2_h=0.3776, saveImg = SAVE_IMG_FLAG):
#Precitaj PDF, pretvori v sliko, obrezi sliko na podrocju, ki te zanimo, zavrti sliko sliko, izvedi ocr na mali sliki,
#shrani vsako stran iz PDF z imenom DatumF+_+precitano_ime+.pdf = 2019-10_MFC04.pdf
#To je okvir za printer iz vzdrževanja, ki ga čitam, w = wide, h=height:
# d1_w = 2045/2340
# d1_h = 855/3310
# d2_w = 2177/2340
# d2_h = 1290/3310
# vrne True, ce je uspesno in False, ce ni
sPath = os.path.dirname(sFile) #samo pot do obstojece datoteke
try:
images = convert_from_path(sFile, poppler_path = POPPLER_PATH)
#Izrezem samo male slikice in pridobim imena
i = 0
ime = []
#iterating through all pages
for img in images:
i = i + 1
png_bytes = io.BytesIO() #shranjujem v pomnilnik
img.save(png_bytes,'PNG') #shanim sliko v PNG formatu v pomnilnik
im = Image.open(png_bytes) #iz pomnilnika nalozim v PIL format slike
if (saveImg == True):
sFileNew = os.path.join(sPath, "PNG%s"%i + ".png")
im.save(sFileNew)
width, height = im.size
t1_w = d1_w * width
t1_h = d1_h * height
t2_w = d2_w * width
t2_h = d2_h * height
im = im.crop((t1_w, t1_h, t2_w, t2_h))
im = im.rotate(90, expand=True)
im = im.convert('L') #L<- grayscale
barcode = pyzbar.decode(im)
try:
text = barcode[0].data.decode("utf-8")
except:
text = "Napaka"+str(i)
print (text)
#text = pytesseract.image_to_string(im)
if text != "":
ime.append(text) #zadnji element je ime
if (saveImg == True):
sFileNew = os.path.join(sPath, "izsek%s"%i + ".png")
im.save(sFileNew)
else:
ime.append("Napaka_stran%s " % i)
sFileNew = os.path.join(sPath, "napaka%s"%i + ".png")
im.save(sFileNew)
print(i, text)
inputpdf = PdfFileReader(open(sFile, "rb"))
i = 0
for i in range(inputpdf.numPages):
output = PdfFileWriter()
output.addPage(inputpdf.getPage(i))
sFileNew = os.path.join(sPath, DatumF +"_"+ime[i] + ".pdf")
with open(sFileNew, "wb") as outputStream:
output.write(outputStream)
i = i + 1
return True
except:
return False
#%%TKINTER
class MainApplication(tk.Frame):
def __init__(self, parent, *args, **kwargs):
tk.Frame.__init__(self, parent, *args, **kwargs)
self.parent = parent
self.mesec = tk.Label(parent, text = "Mesec: ").pack(side="top")
self.vnos_text = tk.StringVar() #datumF npr. 2019-03
self.SaveImageFlag = tk.IntVar() #Zastavica ali se naj shranjujejo pomozne slike
self.sDatoteka = tk.StringVar() #lokacija datoteke za dodajanje
#default vrednost za datum
now = datetime.datetime.now() - datetime.timedelta(days=27) #danasnji datum za ca mesec nazaj
datumF = "%d"%(now.year) + "-" + ('{num:02d}'.format(num=now.month)) #vzorec 2019-03
self.vnos_text.set(datumF)
self.vnos = tk.Entry(parent, textvariable = self.vnos_text).pack(side="top")
self.chBox1 = tk.Checkbutton(parent, text = "Slike se shranijo?", variable = self.SaveImageFlag,
onvalue = 1, offvalue = 0, height=2,
width = 20,
command=self.fSaveImageFlag_state).pack(side="top")
self.btn1 = tk.Button(parent, height = 2, width = 35, bg="#995599", font=("Helvetica", 14),
text=" BAR, razstavi, preimenuj, shrani ",
command=self.fPdfOCRsave).pack(side="bottom")
def fSaveImageFlag_state(self):
if self.SaveImageFlag.get()==1:
local_SAVE_IMG_FLAG = True
else:
local_SAVE_IMG_FLAG = False
return local_SAVE_IMG_FLAG
def fPdfOCRsave(self):
self.sFile = tk.filedialog.askopenfilename(initialdir = "/",title = "Select file",filetypes = (("pdf","*.pdf"),("all files","*.*")))
try:
DatumF = str(self.vnos_text.get()) # DatumF = '2019-06'
retVal = fPdfBAR1(self.sFile, DatumF=DatumF, d1_w=0.0437, d1_h=0.2115, d2_w=0.1496, d2_h=0.3776, saveImg = self.fSaveImageFlag_state() )
if (retVal == True):
#tk.messagebox.showinfo("Information", "Uspeno razstavljeno! ")
pass
else:
tk.messagfPdfBAR1ebox.showinfo("Information", " NEUSPESNO! ")
sPath = os.path.dirname(self.sFile) #samo pot do obstojece datoteke
sPath = os.path.abspath(sPath)
print(sPath)
subprocess.call("explorer " + sPath, shell=True)
except:
tk.messagebox.showwarning("Information", " NAPAKA! ")
#%% MAIN
if __name__ == "__main__":
root = tk.Tk()
root.title("Dodajanje datotek")
MainApplication(root).pack(side="top", fill="both", expand=True)
root.mainloop()
Zagon python skriptov
V windows končnica py ni nastavljena, da se zažene python ampak se običajno odpre urejevalnik besedil. To je OK, saj tako ne zaženeš nekaj po naključju (npr. kak podprogram). Da pa lahko zaženem python skript z dvoklikom uporabljma ali CMD ali VBS.
KLIKNI ZA KODO: Primer zagona s CMD
KLIKNI ZA KODO: Primer zagona s CMD
Narediš datoteko s končnico *.CMD (npr. zagon.cmd) .
Y:
cd "Y:\xx\xxx"
call C:\Users\slanad\AppData\Local\Continuum\anaconda3\Scripts\activate.bat C:\Users\slanad\AppData\Local\Continuum\anaconda3
python.exe SCRIPT_00.py
pause
exit
KLIKNI ZA KODO: Primer zagona z VBS
KLIKNI ZA KODO: Primer zagona z VBS
Narediš datoteko s končnico *.CMD (npr. ED.cmd). Kjer nastaviš, kako boš zaganjal VBS. V osnovi lahko zaženeš VBS direktno, samo potem ne veš ali se bo izvršil 32bitni ali 64bitni. Odvisno od nastavitve v računalniku
@ECHO OFF
Echo "START SCRIPT ime"
ECHO
ECHO "ZAGANJAM SKRIPT 64 bitno"
REM standarni 32bitni cscript ne delujez nekaterimi računalniki in z adodb.connect, zato ta verzija
CD .\xxxxx\
C:\Windows\SysWOW64\CScript.exe "ED.vbs"
CD ..
ECHO 'Logiranje '; %DATE%; %TIME% >>ETLcmd_log.txt
Echo "END"
REM Da takoj ne zapre okna
timeout 10
REM ali pa vpisi PAUSE
exit
Naslednji del pa je, da pripraviš datoteko VBS (npr. ED.vbs) in z njo zaženeš pythonove skripte. VBS script ima določene prednosti pri nekaterih opravilih v windowsih, zato je včasih kombinacija smiselna. Če ne drugje, ko krpaš skupaj kaj novega in obstoječega.
'Zagon python skripta z dvoklikom na datoteko .vbs
'Slana 2018-12-11
sPath = SetPath() 'Sets path to current directory
pPath = PythonPath() 'Sets the path for python.exe
PythonScript("SCRIPT.py")
Function SetPath()
'Pot vzame iz starša datoteke, kjer se skripta nahaja. Doda še \ za lazje zdruzevanje
'Path is taken from parent of the file, where script is located. Adds a \ for easier combining
SetPath = CreateObject("Scripting.FileSystemObject").GetParentFolderName(WScript.ScriptFullName) & "\"
End Function
Function PythonPath()
Set fso = CreateObject("Scripting.FileSystemObject")
'Setting path for Python
'Nastavljanje poti Pythona
path_p = "C:/Users/slanad/AppData/Local/Continuum/anaconda3/Python.exe"
If (fso.FileExists(path_p)) Then
PythonPath = path_p
Else
WScript.Echo "Could not find path"
End If
End Function
Sub PythonScript(ScriptName)
'Runs cmd line
Set winShell = CreateObject("WScript.Shell")
WaitOnReturn = False
windowStyle = 1
'Define the command to run the python file and exit when done
command1 = pPath & " " & ScriptName
command2 = "exit"
'Run the commands
Call winShell.Run("cmd /k " & command1 & " & " & command2, windowStyle, WaitOnReturn)
End Sub
Vrži v mapo in procesiraj (DROP TO FOLDER & PROCESS)
To se mi zdi uporabno namesto raznih vmesnikov, kjer izbiraš, katero datoteko boš obdelal. V tem primeru odložiš datotekov v točno definirano mapo, tam jo program prevzame, preveri, obdela in premakne v mapo, kjer so že obdelane datoteke. V nadaljevanju so zakomentirane še opcije, kako narediš urnik obdelav ali pa kako izvedeš paralelno procesiranje.
KLIKNI ZA KODO: DROP TO FOLDER & PROCESS
KLIKNI ZA KODO: DROP TO FOLDER & PROCESS
Potrebuješ mapi DROP in PROCESSED.
#https://github.com/satssehgal/Excel_Data_Analysis_Scheduler/blob/master/ExcelCron/myutil.py
#Najprej naredis mapo DROP
#Potem rabis mapo PROCESSED
# to potrebujem
import shutil
import os
import pandas as pd
from openpyxl import load_workbook
import schedule
import time
#Najprej kontroliras ali je kaj v mapi DROP
def job():
koncnica_datoteke = '.txt'
#vse datoteke, ki si jih ze predelal
folder_processed = 'processed' #to je mapa v katero kopiras zakljucene jobe
#files_processed = [file for file in os.listdir(folder_processed) if file.endswith(koncnica_datoteke)]
#path_processed = os.path.join(os.getcwd(),folder_processed)
#poglej v mapo droped, ce je kaj novega znotraj
folder_dropped = 'drop'
files_droped = [file for file in os.listdir(folder_dropped) if file.endswith(koncnica_datoteke)]
#ce se pojavi nova datoteka jo obdelaj v funciji fToDo in ce ta vrne da je True, premakni med obdelane
for file in files_droped:
if fToDo(file) == True:
shutil.move(os.path.join(folder_dropped, file), os.path.join(folder_processed, file))
return True
def fToDo(sFile):
#Tukaj dodaj funkcijo, ki se izvede na definirani datoteki.
print(sFile)
#tukaj sedaj lahko to datoteko npr. preberes itd. Ko bos zakljucil, jo bo premaknilo med procesirane.
return True
job()
#tukaj planiras, kdaj se bo funcija 'job' zagnala
#schedule.every().day.at("10:30").do(job)
# schedule.every(10).minutes.do(job)
# schedule.every().hour.do(job)
# schedule.every().day.at("10:30").do(job)
# schedule.every(5).to(10).minutes.do(job)
# schedule.every().monday.do(job)
# schedule.every().wednesday.at("13:15").do(job)
# schedule.every().minute.at(":17").do(job)
# while True:
# schedule.run_pending()
# time.sleep(10) #cas v sekundah
#jobs in paralel
#https://schedule.readthedocs.io/en/stable/faq.html#how-to-execute-jobs-in-parallel
# import time
# def sleeper():
# while True:
# # Get user input
# num = input('How long to wait: ')
# # Try to convert it to a float
# try:
# num = float(num)
# except ValueError:
# print('Please enter in a number.\n')
# continue
# # Run our time.sleep() command,
# # and show the before and after time
# print('Before: %s' % time.ctime())
# time.sleep(num)
# print('After: %s\n' % time.ctime())
# try:
# sleeper()
# except KeyboardInterrupt:
# print('\n\nKeyboard exception received. Exiting.')
# exit()
Author SlanaD
LastMod 2019-12-24