title Analyzing File name ('ANAL') maclib base80 ; Program simple reads file and displays character frequency ; Copyright (c) Werner Cirsovius ; Hohe Weide 44 ; D-20253 Hamburg ; Federal Republic of Germany ; Tel.: (+49)040/4223247 ; Version 1.0, January 2002 ; To get program running use: ; ; ANAL file {/{S}{D}{B}} ; ; One or more of the following options may be selected: ; ; /S if output should be sorted. ; /D if output should suppress asterisks. ; /B if file is to be treated as binary rather than ASCII. ; Technical hint: ; The calculation bases upon long integers (32 bit) with format: ; ; MSB, BYTE, BYTE, LSB ; ; This differs from the INTEL format storing 16 bit numbers: ; ; LSB, MSB MAXROW equ 24 ; Max number of terminal rows MAXCOL equ 80 ; Max number of terminal colums INTLEN equ 4 ; Length of 32 bit integer entry $memry ext strcn0,conout,conino,conchd,crlf,fidrus ext open,dskget ext add32,sub32,mul32,div32,dout32 ext ascbyt dseg $RULES: db 'Analyze (text) file' db cr,lf,lf db 'Call it: ANAL file {/{S}{D}{B}}' db cr,lf,lf db 'Select option:' db cr,lf db '/S if output should be sorted.' db cr,lf db '/D if output should suppress asterisks.' db cr,lf db '/B if file is to be treated as binary rather than ASCII.' db cr,lf,null $OPTS: db 'B','D','S' OPTLEN equ $-$OPTS OPTEXE: dw B.opt,D.opt,S.opt $NOFILE: db 'Cannot find file ',null StatPB: dw $ASCII,ASCIIset StatItm equ $-StatPB dw $CTRL,CTRLset dw $BIN,BINset StatLen equ $-StatPB $CHAR1: db 'Found ',null $CHAR2: db ' out of 256 possible combinations',cr,lf,null $TOTAL: db 'Total=',null $BYTES: db ' Bytes',null $ASCII: db 'ASCII=',null $CTRL: db ', Control=',null $BIN: db ', Binary=',null $READ: db 'Reading file...' db cr,lf,null $SORT: db 'Sorting data...' db cr,lf,null $STATISTIC: db 'Statistic',null $MORE: db '<< MORE >>',cr,null $NOMORE: db ' ',cr,null $BREAK: db 'User break' db cr,lf,null $HEX: db 'xx',null $BLNK: db ' ',null $ASCCHR: db '(x)',null filmode: db 0 opt: db 0 noasterisk: db 0 mx: db 0,0,0,0 TEN: db 0,0,0,10 TERMWIDTH: db 0,0,0,0 CHARset: db 0,0 ASCIIset: db 0,0,0,0 CTRLset: db 0,0,0,0 BINset: db 0,0,0,0 row: db 0 offset: dw 256/2 ; Half of byte combinations CURITM: db 0 ITEMS: ds 1 limit: ds 2 swapline: ds 2 line: ds 2 chb: ; \ ds 1 ; | chi: ; | ds 4 ; / chv: ds 4 dig: ds 1 data: ds 2 d.line: ds 2 d.offset: ds 2 heap: ds 2 $memry: ds 2 cseg ; ; Tell bad selection and exit ; BadCall: ld de,$RULES call strcn0 ; Tell rules jp OS ; And give up ; ; Tell file not found and exit ; BadFile: ld de,$NOFILE call strcn0 ; Tell file not found ld de,FCB call fidrus ; Tell file call crlf jp OS ; And give up ; ; Initialize memory ; Init: ld hl,($memry) ; Get pointer to first free address ld (data),hl xor a ld b,a ; ... is 256 for byte ld c,a ; Set index setdata: ld (hl),c ; Save character value inc hl ld (hl),a ; Clear value inc hl ld (hl),a inc hl ld (hl),a inc hl ld (hl),a inc hl inc c ; Next character djnz setdata ld (heap),hl ; Set pointer to free memory ret ; ; Test zero number ^IX - Z set says yes ; iszero: ld a,(ix+0) or (ix+1) ; Get result or (ix+2) or (ix+3) ret ; ; Calculate max from ^BC:^DE ; max: push bc ; Save pointers push de ld hl,(heap) ; Point to dummy area call sub32 ; Compare pop hl pop de ret nc ; Got max already ld bc,INTLEN ldir ; Unpack new max ret ; ; Update character counter from Accu ; Update: ld ix,CTRLset cp ' ' ; Test range jr c,inc32 ; 0x00..0x1F means control ld ix,ASCIIset ; 0x20..0x7E means ASCII cp '~'+1 jr c,inc32 ld ix,BINset ; 0x7F..0xFF means binary ; ; Increment data ^IX ; inc32: inc (ix+3) ; Start with LSB ret nz inc (ix+2) ; Remember carry ret nz inc (ix+1) ret nz inc (ix+0) ; End with MSB ret ; ; Build current data pointer ^HL from HL ; posdata: ld e,l ld d,h add hl,hl ; *5 (INTLEN+1) add hl,hl add hl,de ld de,(data) ; Get base of data add hl,de ; Position it ret ; ; Test if data to be swapped ; test.swap: ld hl,(line) call posdata ; Get first pointer ld (d.line),hl ld hl,(line) ld de,(offset) add hl,de call posdata ; Get 2nd pointer ld (d.offset),hl ld c,l ld b,h ld de,(d.line) inc bc inc de ld hl,(heap) ; Point to dummy area call sub32 ; Compare ret nc ; In range ld de,(heap) ; Swap values ld hl,(d.line) ld bc,INTLEN+1 ldir ld de,(d.line) ld hl,(d.offset) ld bc,INTLEN+1 ldir ld de,(d.offset) ld hl,(heap) ld bc,INTLEN+1 ldir ld hl,(line) ld (swapline),hl ; Remember position ret ; ; Go thru the data ; for: ld (line),hl ; Set index ld a,e or d ; Test loop done ret z push de call test.swap ; Test items to be swapped pop de ld hl,(line) inc hl dec de jr for ; ; Sort data - methode: shell sort ; ; Based upon a PASCAL program published ; in CPC 12'1991/1'1992 ; Sort: ld de,$SORT call strcn0 ; Tell action ld hl,(offset) ; Get start value while: ld a,l or h ret z ex de,hl ld hl,255 ; Set inner loop sbc hl,de ld (limit),hl repeat: ld hl,0 ld (swapline),hl ld de,(limit) bit 7,d call z,for ; Go thru loop if > 0 ld de,(offset) ld hl,(swapline) or a sbc hl,de ld (limit),hl add hl,de ld a,l or h jr nz,repeat ld hl,(offset) srl h ; Halve offset rr l ld (offset),hl jr while ; ; Write hex value ; Hex: ld de,$HEX call ascbyt ; Convert to ASCII dec de ; Position to start dec de call strcn0 ; Print hex ret ; ; Write ASCII value ; ASCII: ld de,strcn0 push de ; Set return address ld de,$BLNK cp ' ' ; Test printable ret c ; Nope cp '~'+1 ret nc ld de,$ASCCHR+1 ld (de),a ; Store character dec de ret ; Print it ; ; Write 32 bit integer value ^HL ; printeg: call num2str ; Convert number to string call strcn0 ; Print it ret ; ; Convert 32 bit integer value ^HL - Resulting string in ^DE ; num2str: ld de,(heap) ; Point to string area push de ld b,null ; Set end character call dout32 ; Convert to string pop de ret ; ; Write 32 bit integer value ^HL with fixed length ; prfinteg: call num2str ; Convert number to string push de or a sbc hl,de ; Get length of number ld a,(dig) sub l ; Get empty places ld b,a ld e,' ' call nz,conchd ; Give leading blanks if any pop de call strcn0 ; Print digit ret ; ; Write asterisks chi*TERMWIDTH/mx times ; ; [ ; mx -> TERMWIDTH ; 1 -> TERMWIDTH/mx ; chi -> chi*TERMWIDTH/mx ; ] ; asterisks: ld bc,chi ld de,TERMWIDTH ld hl,chv call mul32 ; Multiply ld bc,chv ld de,mx ld hl,chv call div32 ; Build count ld a,(chv+3) ; Get it ld b,a ld e,'*' inc b dec b call nz,conchd ; Output asterisks if any to be printed ret ; ; Give new line ; NL: call crlf ; Give line ld hl,row inc (hl) ; Advanve row ld a,MAXROW cp (hl) ; Compare ret nc ; Still in range ld (hl),0 ; Clear count ld de,$MORE call strcn0 ; Tell more pages call conino ; Wait for key push af ld de,$NOMORE call strcn0 pop af cp 'C'-'@' ; Test abort ret nz ; Nope ld de,$BREAK call strcn0 ; Tell abort jp OS ; And do it ; ; Dump data set ; dumpdata: ld hl,(CHARset) inc hl ; Bump character count ld (CHARset),hl ld hl,chb call Hex ; Write hex ld a,(chb) call ASCII ; Write ASCII ld a,'<' call conout ld hl,chi call prfinteg ; Print count ld a,'>' call conout ld a,':' call conout ld a,(noasterisk) or a ; Test asterisk to be given jr nz,deconly call asterisks ; Write asterisks call NL ; Give new line ret deconly: ld hl,CURITM inc (hl) ; Advance position ld a,(ITEMS) sub (hl) ; Test in range ret nz ; Yeap ld (hl),a ; Clear count call NL ; Give new line ret ; ; Set up terminal values ; ; It calculates max digits for numbers: ; ; dig = log(mx)+1 -> (ln(mx)/ln(10))+1 ; ; Then it calculates remaining terminal width for asterisk output: ; ; TERMWIDTH = MAXCOL-hex[2]-ascii[3]-(3+dig)[number] ; ; The value will be used as floating point value ; TERMSET: ld de,(heap) ; Get free ld hl,mx ld bc,INTLEN ldir ; Unpack it dig.get: ld hl,(heap) ; Get number ld c,l ld b,h ld de,TEN call div32 ; Divide by 10 ld hl,dig inc (hl) ; Bump digit count ld ix,(heap) ; Get remainder call iszero ; Test more jr nz,dig.get ld a,(dig) ; Get digit add a,2+3+3 ; Add for fix characters ld b,a ld a,MAXCOL sub b ; Get remaining places ld (TERMWIDTH+3),a inc b ; Fix for one more character ld c,-1 ld a,MAXCOL divCOL: sub b ; Divide column by by item inc c ; Bump quotient jr nc,divCOL ld a,c ld (ITEMS),a ; Save it ret ; ; Select option from ^HL ; getopt: ld e,000b ; Clear corresponding bits nxtopt: ld a,(hl) ; Get possible option cp ' ' ret z ; Nope, end push hl ld hl,$OPTS+OPTLEN-1 ld bc,OPTLEN cpdr ; Find option jp nz,BadCall ; Should be known ld hl,OPTEXE add hl,bc ; Position in table add hl,bc ld c,(hl) inc hl ld b,(hl) ld hl,OPTRET push hl push bc ret ; Go for option OPTRET: pop hl inc hl jr nxtopt ; ; Process option 'B' - Treat as binary ; B.opt: ld a,001b ld ix,filmode jr set.opt ; ; Process option 'D' - Suppress asterisks ; D.opt: ld a,010b ld ix,noasterisk jr set.opt ; ; Process option 'S' - Sort ; S.opt: ld a,100b ld ix,opt set.opt: ld b,a and e ; Test already set jp nz,BadCall ; Should not be more than once ld a,b or e ; Set bit ld e,a ld (ix),-1 ; Set flag ret ; ; Test end of ASCII file - C set says yes ; ASCIIend: ld l,a ; Save character ld a,(filmode) ; Get file mode sub -1 sub 1 ; Test binary ld a,l ccf ret nc ; Yeap cp eof ; Test logical end scf ret z ccf ; Set ok ret ; ; %%%%%%%%%%%%%%%%%%% ; %% ENTER PROGRAM %% ; %%%%%%%%%%%%%%%%%%% ; ANAL: ld sp,(TPAtop) ; Load stack ld a,(FCBnam) ; Get filename cp ' ' ; Verify name fiven jp z,BadCall ; Should be ld hl,FCBnm2 ; Point to option ld a,(hl) ; Get option prefix cp '/' inc hl call z,getopt ; Get option if any ld de,FCB call open ; Open source file jp c,BadFile ; Not found call Init ; Initialize memory ld de,$READ call strcn0 ; Tell reading rdfile: call dskget ; Read from file call nc,ASCIIend jr c,endfile ; End of file push af ld l,a ld h,0 ld e,l ld d,h add hl,hl ; *2 add hl,hl ; *4 add hl,de ; *5 ex de,hl ld ix,(data) ; Get base of data add ix,de ; Position in record inc ix call inc32 ; Increment data pop af call Update ; Update counter jr rdfile endfile: ld b,0 ; ... is 256 ld de,(data) inc de mxloop: push bc push de ld bc,mx call max ; Calculate max pop de ld hl,INTLEN+1 add hl,de ex de,hl pop bc djnz mxloop call TERMSET ; Set up terminal values ld a,(opt) or a ; Test sort call nz,Sort ; Yeap, do it ld b,0 ; ... is 256 ld hl,(data) analoop: push bc ld de,chb ld bc,INTLEN+1 ldir ; Unpack record push hl ld ix,chi call iszero ; Test zero count call nz,dumpdata ; Dump data if not pop hl pop bc djnz analoop call NL ld de,$STATISTIC call strcn0 ; Tell statistic follows call NL ld iy,StatPB ld b,StatLen/StatItm statloop: push bc ld e,(iy+0) ld d,(iy+1) call strcn0 ; Tell mode ld l,(iy+2) ld h,(iy+3) call printeg ; Print count ld bc,4 add iy,bc pop bc djnz statloop call NL ld bc,ASCIIset ld de,CTRLset ld hl,chv call add32 ; Build sum of all characters read call NL ld bc,BINset ld de,chv ld hl,chv call add32 ld de,$TOTAL call strcn0 ld hl,chv call printeg ; Print total count ld de,$BYTES call strcn0 call NL ld hl,0 ld (chv),hl ld hl,(CHARset) ld a,l ; Swap bytes ld l,h ld h,a ld (chv+2),hl ld de,$CHAR1 call strcn0 ld hl,chv call printeg ; Print character count ld de,$CHAR2 call strcn0 jp OS ; That's all end ANAL