It’s simple, but simple isn’t the same as easy

Old Man’s War (John Scalzi)

Tukaj sem nametal nekaj skriptov, za katere se mi zdi, da jih bom še potreboval.

RMD narediš lahko malo lepši s pomočjo teh navodil

Črtna koda iz PDF

Za čitanje PDF uporabim poppler (http://blog.alivate.com.au/poppler-windows/)

Črtno kodo čitam s pomočjo knjižnice ZBAR (https://sourceforge.net/projects/zbar/files/zbar/0.10/zbar-0.10-setup.exe/download).

S pip install pyzbar si namestiš pythonov vmesnik.


Deluje nekako tako:

  • prečitaš PDF

  • v io (pomnilnik) shraniš sliko

  • iz slike prečitaš črtno kodo

Pseudo koda:



  images = convert_from_path(PDF_File, poppler_path = POPPLER_PATH) #iz PDF v sliko  
  png_bytes = io.BytesIO()                                          #shranjujem v pomnilnik
  images[0].save(png_bytes,'PNG')                                   #shanim sliko v PNG formatu v pomnilnik
  im = Image.open(png_bytes)                                        #iz pomnilnika nalozim v PIL format slike
  barcode = pyzbar.decode(im)                                       #precitam bar kodo
  text = barcode[0].data.decode("utf-8")                            #razstavim bar kodo in pridobim vsebino
  

KLIKNI ZA KODO: Tukaj je celotna koda, vključno s tkinter GUI



# -*- coding: utf-8 -*-
"""
Created on Wed Oct  9 14:11:56 2019

Preberi z bar code

@author: slanad

# NAMESTI PRED ZAČETKOM
#    #ZBAR
#    #https://sourceforge.net/projects/zbar/files/zbar/0.10/zbar-0.10-setup.exe/download

#    # pip install pillow - mogoče bo treba namestiti verzijo 4.0
#    # pip install minecart
#    # pip install PyPDF2
#    # pip install pdf2Image
#    # https://blog.alivate.com.au/poppler-windows/
#    # Poppler for windows: http://blog.alivate.com.au/poppler-windows/ 
#    # bin moraš imeti v poti, to narediš s sys.path.apend("pot do poppler-068.0/bin")
#    # deluje tudi ce dolocis v poti pot do popplerja - glej spodaj ukaz 
#    # images = convert_from_path(sFile, poppler_path = POPPLER_PATH)
"""

# %% Common routines
import os 
import sys
import datetime

import tkinter as tk
from tkinter import filedialog
from tkinter import messagebox
import subprocess

import io
from PIL import Image
from PyPDF2 import PdfFileWriter, PdfFileReader 
from pdf2image import convert_from_path
from pyzbar import pyzbar


#import pytesseract

SAVE_IMG_FLAG = True #ali shranjuje vmesne rezultate ali ne

# nastavi glede na racunalnik
if os.path.exists('D:\\OneDrive'):
    #SLD DOMA
    POPPLER_PATH = 'D:\\OneDrive\\Dokumenti\\Python\\PdfOCR01\\poppler-0.68.0\\bin' 
    sys.path.append('D:\\OneDrive\\Dokumenti\\Python\\PdfOCR01\\poppler-0.68.0\\bin')  
    #sys.path.append('D:\\OneDrive\\Dokumenti\\Python\\PdfOCR01\\ZBar\\bin')

    #pytesseract.pytesseract.tesseract_cmd = r'C:\Users\Slana\AppData\Local\Tesseract-OCR\tesseract.exe'
 
elif os.path.exists('C:\\Users\\slanad\\OneDrive'):
    #SLD V SLUZBI
    POPPLER_PATH = 'C:\\Users\\slanad\\OneDrive\\Dokumenti\\Python\\PdfOCR01\\poppler-0.68.0\\bin'    
    sys.path.append('C:\\Users\\slanad\\OneDrive\\Dokumenti\\Python\\PdfOCR01\poppler-0.68.0\\bin') 
    sys.path.append('C:\\Users\\slanad\\OneDrive\\Dokumenti\\Python\\PdfOCR01\\ZBar\\bin')
    #pytesseract.pytesseract.tesseract_cmd = r'C:\Users\slanad\AppData\Local\Tesseract-OCR\tesseract.exe'

#Testiraj pogoje
if not(os.path.exists(POPPLER_PATH)):
   print("NO POPPLER !")
   exit

#Test data
sFile = r"C:\Users\slanad\OneDrive\Dokumenti\Python\PdfOCR01\test\PrinterINST_3_CB.pdf"
d1_w = 100/2340
d1_h = 700/3310
d2_w = 350/2340
d2_h = 1250/3310  
DatumF="2019-10"
saveImg = SAVE_IMG_FLAG
# fPdfBAR1(sFile) #za testiranje funkcije

#%%Precitaj PDF

def fPdfBAR1(sFile, DatumF="2019-10", d1_w=0.0437, d1_h=0.2115, d2_w=0.1496, d2_h=0.3776, saveImg = SAVE_IMG_FLAG):
#Precitaj PDF, pretvori v sliko, obrezi sliko na podrocju, ki te zanimo, zavrti sliko sliko, izvedi ocr na mali sliki, 
#shrani vsako stran iz PDF z imenom DatumF+_+precitano_ime+.pdf = 2019-10_MFC04.pdf   
#To je okvir za printer iz vzdrževanja, ki ga čitam, w = wide, h=height:
#    d1_w = 2045/2340
#    d1_h = 855/3310
#    d2_w = 2177/2340
#    d2_h = 1290/3310  
# vrne True, ce je uspesno in False, ce ni
    
    sPath = os.path.dirname(sFile) #samo pot do obstojece datoteke
  
    try:      
        images = convert_from_path(sFile, poppler_path = POPPLER_PATH)  
    
        #Izrezem samo male slikice in pridobim imena
        i = 0
        ime = []
        #iterating through all pages
        for img in images:
            i = i + 1
            
            png_bytes = io.BytesIO() #shranjujem v pomnilnik
            img.save(png_bytes,'PNG') #shanim sliko v PNG formatu v pomnilnik

            im = Image.open(png_bytes) #iz pomnilnika nalozim v PIL format slike
            
            if (saveImg == True):
                 sFileNew = os.path.join(sPath, "PNG%s"%i + ".png")
                 im.save(sFileNew)
                 
            width, height = im.size
            t1_w = d1_w * width
            t1_h = d1_h * height
            t2_w = d2_w * width
            t2_h = d2_h * height  
            
            im = im.crop((t1_w, t1_h, t2_w, t2_h))
            im = im.rotate(90, expand=True)
            im = im.convert('L') #L<- grayscale
            
            barcode = pyzbar.decode(im)
            try:
                text = barcode[0].data.decode("utf-8")
            except:
                text = "Napaka"+str(i)
    
            print (text)
            #text = pytesseract.image_to_string(im)
            
            if text != "":
                ime.append(text) #zadnji element je ime
                
                if (saveImg == True):
                    sFileNew = os.path.join(sPath, "izsek%s"%i + ".png")
                    im.save(sFileNew)  
                    
            else:
                ime.append("Napaka_stran%s " % i) 
                sFileNew = os.path.join(sPath, "napaka%s"%i + ".png")
                im.save(sFileNew)
            print(i, text)

        
        inputpdf = PdfFileReader(open(sFile, "rb"))
        i = 0
        for i in range(inputpdf.numPages):
            output = PdfFileWriter()
            output.addPage(inputpdf.getPage(i))
            sFileNew = os.path.join(sPath, DatumF +"_"+ime[i] + ".pdf")
            with open(sFileNew, "wb") as outputStream:
                output.write(outputStream)
            i = i + 1 
        
        return True
    except:
        return False

#%%TKINTER
class MainApplication(tk.Frame):
    def __init__(self, parent, *args, **kwargs):
        tk.Frame.__init__(self, parent, *args, **kwargs)
        self.parent = parent
                
        self.mesec  = tk.Label(parent, text = "Mesec: ").pack(side="top")
        self.vnos_text = tk.StringVar() #datumF npr. 2019-03
        self.SaveImageFlag = tk.IntVar() #Zastavica ali se naj shranjujejo pomozne slike
        self.sDatoteka = tk.StringVar() #lokacija datoteke za dodajanje
        #default vrednost za datum
        now = datetime.datetime.now() - datetime.timedelta(days=27) #danasnji datum za ca mesec nazaj
        datumF = "%d"%(now.year) + "-" +  ('{num:02d}'.format(num=now.month)) #vzorec 2019-03     
        
        self.vnos_text.set(datumF)
        self.vnos = tk.Entry(parent, textvariable = self.vnos_text).pack(side="top")
        
        self.chBox1 = tk.Checkbutton(parent, text = "Slike se shranijo?", variable = self.SaveImageFlag,
                 onvalue = 1, offvalue = 0, height=2,
                 width = 20,
                 command=self.fSaveImageFlag_state).pack(side="top")
        
        self.btn1 = tk.Button(parent, height = 2, width = 35, bg="#995599", font=("Helvetica", 14),
                         text="  BAR, razstavi, preimenuj, shrani   ",
                         command=self.fPdfOCRsave).pack(side="bottom")
    
    def fSaveImageFlag_state(self):
        if self.SaveImageFlag.get()==1:
            local_SAVE_IMG_FLAG = True
        else:
            local_SAVE_IMG_FLAG = False
        return local_SAVE_IMG_FLAG
        
    def fPdfOCRsave(self):
        self.sFile = tk.filedialog.askopenfilename(initialdir = "/",title = "Select file",filetypes = (("pdf","*.pdf"),("all files","*.*")))     
        try:
            
            DatumF = str(self.vnos_text.get()) # DatumF = '2019-06'
            
            retVal = fPdfBAR1(self.sFile, DatumF=DatumF, d1_w=0.0437, d1_h=0.2115, d2_w=0.1496, d2_h=0.3776, saveImg = self.fSaveImageFlag_state() )
            if (retVal == True):
                #tk.messagebox.showinfo("Information", "Uspeno razstavljeno! ")
                pass
            else:
                tk.messagfPdfBAR1ebox.showinfo("Information", " NEUSPESNO! ")
                
            sPath = os.path.dirname(self.sFile) #samo pot do obstojece datoteke
            sPath = os.path.abspath(sPath)
            print(sPath)
            subprocess.call("explorer " + sPath, shell=True)
            
        except:
            tk.messagebox.showwarning("Information", " NAPAKA! ")
        

#%% MAIN
if __name__ == "__main__":  

    root = tk.Tk()
    root.title("Dodajanje datotek")
    MainApplication(root).pack(side="top", fill="both", expand=True)
    root.mainloop()


  


Zagon python skriptov

V windows končnica py ni nastavljena, da se zažene python ampak se običajno odpre urejevalnik besedil. To je OK, saj tako ne zaženeš nekaj po naključju (npr. kak podprogram). Da pa lahko zaženem python skript z dvoklikom uporabljma ali CMD ali VBS.

KLIKNI ZA KODO: Primer zagona s CMD

Narediš datoteko s končnico *.CMD (npr. zagon.cmd) .



Y:
cd "Y:\xx\xxx"

call C:\Users\slanad\AppData\Local\Continuum\anaconda3\Scripts\activate.bat C:\Users\slanad\AppData\Local\Continuum\anaconda3
python.exe SCRIPT_00.py

pause
exit
  

KLIKNI ZA KODO: Primer zagona z VBS

Narediš datoteko s končnico *.CMD (npr. ED.cmd). Kjer nastaviš, kako boš zaganjal VBS. V osnovi lahko zaženeš VBS direktno, samo potem ne veš ali se bo izvršil 32bitni ali 64bitni. Odvisno od nastavitve v računalniku



@ECHO OFF
Echo "START SCRIPT ime"

ECHO
ECHO "ZAGANJAM SKRIPT 64 bitno"
REM standarni 32bitni cscript ne delujez nekaterimi računalniki in z adodb.connect, zato ta verzija
CD .\xxxxx\
C:\Windows\SysWOW64\CScript.exe "ED.vbs"
CD ..
ECHO 'Logiranje '; %DATE%; %TIME% >>ETLcmd_log.txt

Echo "END"

REM Da takoj ne zapre okna
timeout 10
REM ali pa vpisi PAUSE

exit
  

Naslednji del pa je, da pripraviš datoteko VBS (npr. ED.vbs) in z njo zaženeš pythonove skripte. VBS script ima določene prednosti pri nekaterih opravilih v windowsih, zato je včasih kombinacija smiselna. Če ne drugje, ko krpaš skupaj kaj novega in obstoječega.


'Zagon python skripta z dvoklikom na datoteko .vbs
'Slana 2018-12-11

sPath = SetPath() 'Sets path to current directory
pPath = PythonPath() 'Sets the path for python.exe


PythonScript("SCRIPT.py")

Function SetPath()

    'Pot vzame iz starša datoteke, kjer se skripta nahaja. Doda še \ za lazje zdruzevanje
    'Path is taken from parent of the file, where script is located. Adds a \ for easier combining
    SetPath = CreateObject("Scripting.FileSystemObject").GetParentFolderName(WScript.ScriptFullName) & "\"

End Function


Function PythonPath()

    Set fso = CreateObject("Scripting.FileSystemObject")
    'Setting path for Python
    'Nastavljanje poti Pythona

    path_p = "C:/Users/slanad/AppData/Local/Continuum/anaconda3/Python.exe"

    If (fso.FileExists(path_p)) Then
        PythonPath = path_p
    Else
        WScript.Echo "Could not find path"
    End If

End Function

Sub PythonScript(ScriptName)

    'Runs cmd line
    Set winShell = CreateObject("WScript.Shell")
    WaitOnReturn = False
    windowStyle = 1

    'Define the command to run the python file and exit when done
    command1 = pPath & " " & ScriptName
    command2 = "exit"

    'Run the commands
    Call winShell.Run("cmd /k " & command1 & " & " & command2, windowStyle, WaitOnReturn)

End Sub


Vrži v mapo in procesiraj (DROP TO FOLDER & PROCESS)

To se mi zdi uporabno namesto raznih vmesnikov, kjer izbiraš, katero datoteko boš obdelal. V tem primeru odložiš datotekov v točno definirano mapo, tam jo program prevzame, preveri, obdela in premakne v mapo, kjer so že obdelane datoteke. V nadaljevanju so zakomentirane še opcije, kako narediš urnik obdelav ali pa kako izvedeš paralelno procesiranje.

KLIKNI ZA KODO: DROP TO FOLDER & PROCESS

Potrebuješ mapi DROP in PROCESSED.



#https://github.com/satssehgal/Excel_Data_Analysis_Scheduler/blob/master/ExcelCron/myutil.py
#Najprej naredis mapo DROP
#Potem rabis mapo PROCESSED

# to potrebujem
import shutil
import os
import pandas as pd
from openpyxl import load_workbook
import schedule
import time

#Najprej kontroliras ali je kaj v mapi DROP

def job():
    koncnica_datoteke = '.txt'
    #vse datoteke, ki si jih ze predelal
    folder_processed = 'processed' #to je mapa v katero kopiras zakljucene jobe 
    #files_processed = [file for file in os.listdir(folder_processed) if file.endswith(koncnica_datoteke)]
    #path_processed = os.path.join(os.getcwd(),folder_processed)

    #poglej v mapo droped, ce je kaj novega znotraj
    folder_dropped = 'drop'
    files_droped = [file for file in os.listdir(folder_dropped) if file.endswith(koncnica_datoteke)]
 
    #ce se pojavi nova datoteka jo obdelaj v funciji fToDo in ce ta vrne da je True, premakni med obdelane
    for file in files_droped:
        if fToDo(file) == True:
            shutil.move(os.path.join(folder_dropped, file), os.path.join(folder_processed, file))
    return True

def fToDo(sFile):
#Tukaj dodaj funkcijo, ki se izvede na definirani datoteki.
    print(sFile)
    #tukaj sedaj lahko to datoteko npr. preberes itd. Ko bos zakljucil, jo bo premaknilo med procesirane.
    return True


job()

#tukaj planiras, kdaj se bo funcija 'job' zagnala
#schedule.every().day.at("10:30").do(job)

# schedule.every(10).minutes.do(job)
# schedule.every().hour.do(job)
# schedule.every().day.at("10:30").do(job)
# schedule.every(5).to(10).minutes.do(job)
# schedule.every().monday.do(job)
# schedule.every().wednesday.at("13:15").do(job)
# schedule.every().minute.at(":17").do(job)

# while True:
#     schedule.run_pending()
#     time.sleep(10) #cas v sekundah

#jobs in paralel
#https://schedule.readthedocs.io/en/stable/faq.html#how-to-execute-jobs-in-parallel



# import time
 
# def sleeper():
#     while True:
#         # Get user input
#         num = input('How long to wait: ')
 
#         # Try to convert it to a float
#         try:
#             num = float(num)
#         except ValueError:
#             print('Please enter in a number.\n')
#             continue
 
#         # Run our time.sleep() command,
#         # and show the before and after time
#         print('Before: %s' % time.ctime())
#         time.sleep(num)
#         print('After: %s\n' % time.ctime())
 
 
# try:
#     sleeper()
# except KeyboardInterrupt:
#     print('\n\nKeyboard exception received. Exiting.')
#     exit()