% scancsv. tex %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 26. 2. 2005 Petr Olsak % This is a macro for processing the CSV format in plain TeX. % % The input file is assumed in the form: % % "header1";"header2";"header3"; ... "header-n" % "text 1,1";"text 1,2";"text 1,3"; ... "text 1,n" % "text 2,1";"text 2,2";"text 2,3"; ... "text 2,n" % "text 3,1";"text 3,2";"text 3,3"; ... "text 3,n" % ... % % You can process such a file by % % \input scancsv.tex % \def\lineaction{...} % \scanbase soubor % % The separator " can be omited in a arbitrary header or item text. % I.e. the following format is scanned without problems: % % "firstname";"lastname";number;RC % "Ferdinand";"Mravenec";1;000111/2244 % "Josef";"Vykutal";2;444333/2211 % % The macro reads the headers and then reads next lines repeatedly. % The full expanable macro \e is ready after the line is read. % More exactly \e[header] expands to the appropriate item body. % The \lineaction macro is processed after each line is read. % It is supposed that \lineaction is defined by user. % % Next line of the table is read after \lineaction, the \e macros have % a new meaning (items from this next line) and the \lineaction is executed % again. This is repeated until last line of the input table is reached. % Moreower, the \linenum register is available, where the number of the % last scanned line is stored. % Example: % % \input scancsv % % \newcount \mylines % \def\bb #1 #2/{\hbox to#1{#2\hss}} % % \def\printaction{\global\advance\mylines by1 %% \scanabase works % \hbox{% %% in the TeX group % \bb 2em \the\numline./ % \bb 26em \e[subject]/ % \bb 10em \e[lastname] \e[firstname]/ % \bb 3em \hfill\e[pay2002]/ % \bb 3em \hfill\e[pay2001]/} % } % \def\lineaction{\if K\e[member_type]% Institutional members % \printaction % \else \if G\e[member_type]% High school % \printaction % \fi\fi % I am printing Institutional mambers and high schools only % } % \scanbase database1 % \scanbase database2 % {\it Number of printed lines: \the\mylines}. % \end % % If the \lineaction macro isn't defined by user then scanbase used % its own (default) macro which prints all items from one line % into the one paragraph in comprimend form (you can try this). % % The \scanbase macro opens the TeX group then runs \beginhook % then reads headers, then reads the lines ans processes \linecation % repeatedly, then runs \endhook and finally closes the group. % Default values for \beginhook and \endhook is \relax but user % can define something else. % % The user can redefine the \separator and \obklopeni (surrounding char) % after \input scanbase if there are used another characters than semicolon % and " in the input file. \def\separator{;} % separator mezi polozkami \def\obklopeni{"} % nepovinne "obklopeni" polozky \newcount\colnum \newcount\numline \catcode`\^^X=13 \def^^X{} \def\scanheader{\futurelet \nextchar \doheader} \def\doheader {\expandafter\ifx \obklopeni\nextchar \expandafter \maskheader \else \expandafter \nomaskheader \fi} \def\runheader{\advance\colnum by1 \expandafter \ifx \csname e:\itemdata\endcsname \relax \expandafter \edef \csname c:\the\colnum\endcsname{\itemdata}% \expandafter \def \csname e:\itemdata\endcsname {}% \else \expandafter \edef \csname c:\the\colnum\endcsname{\itemdata:\the\colnum}% \fi \futurelet \nextchar \testnextchar } \def\testnextchar{\ifx\nextchar^^X\let\next=\ignorethirdline \else \let\next=\scanheader \fi \next } \def\ignorethirdline ^^X{\edef\maxcolumn{\the\colnum}\runfirstitem} \def\runfirstitem #1^^X{\def\tmp{#1}% \ifx\tmp\empty \expandafter\endinput \fi \ifx\tmp\stopmark \else \colnum=0 \edef\tmp{\noexpand\scanitem #1\separator\noexpand^^X}\expandafter\tmp \fi} \def\scanitem {\futurelet \nextchar \doitem} \def\doitem {\expandafter \ifx \obklopeni\nextchar \expandafter \maskitem \else \expandafter \nomaskitem \fi} \def\runitem {\advance\colnum by1 \expandafter\edef\csname e:\csname c:\the\colnum\endcsname\endcsname{\itemdata}% \futurelet \nextchar \testnextitem } \def\testnextitem{\ifx\nextchar^^X\let\next=\runline \else \let\next=\scanitem \fi \next } \def\runline ^^X{\advance\numline by1 \lineaction \futurelet \nextchar \runfirstitem } \def\e [#1]{\expandafter\ifx \csname e:#1\endcsname \relax \message{Warning: the #1 column is not defined in header.}% \else \csname e:#1\endcsname \fi } \def\printall{\colnum = 0 \noindent \hangindent=\parindent \raggedright \loop \advance\colnum by1 {\seventt \ignorespaces \csname c:\the\colnum\endcsname:}\penalty0 \csname e:\csname c:\the\colnum\endcsname\endcsname \ifnum\colnum < \maxcolumn , \repeat .\par } \let\lineaction=\printall \def\scanbase #1 {\begingroup \endlinechar=`\^^X \edef\maskauvo{\obklopeni####1\obklopeni\separator} \edef\maskaneuvo{####1\separator} \expandafter\def \expandafter\maskheader \maskauvo {\def\itemdata{##1}\runheader} \expandafter\def \expandafter\nomaskheader \maskaneuvo {\def\itemdata{##1}\runheader} \expandafter\def \expandafter\maskitem \maskauvo {\def\itemdata{##1}\runitem} \expandafter\def \expandafter\nomaskitem \maskaneuvo {\def\itemdata{##1}\runitem} \edef\scanfirstline ##1^^X{\noexpand\scanheader##1\separator\noexpand^^X} \def\do##1{\catcode`##1=12 }\dospecials \catcode`\ =10 \beginhook \expandafter \scanfirstline \input #1 \relax^^X\endhook \endgroup} \def\stopmark{\relax} \let\beginhook=\relax \let\endhook=\relax \endinput % Makro na zpracovani databasovych vystupu ve formatu CSV pro plain. % % Srovnejte tez makro scanbase.tex % % Nacitane soubory se predpokladaji ve tvaru: % % "zahlavi1";"zahlavi2";"zahlavi3"; ... "zahlavi-n" % "text 1,1";"text 1,2";"text 1,3"; ... "text 1,n" % "text 2,1";"text 2,2";"text 2,3"; ... "text 2,n" % "text 3,1";"text 3,2";"text 3,3"; ... "text 3,n" % ... % % Na takovy soubor je mozno po % % \input scancsv.tex % % aplikovat makro \scanbase takto: % % \scanbase soubor % % Obklopujici znak " muze u libovolneho zahlavi nebo polozky % chybet. Tj. je korektni treba i takovy zapis dat: % % "jmeno";"prijmeni";cislo;RC % "Ferdinand";"Mravenec";1;000111/2244 % "Josef";"Vykutal";2;444333/2211 % % Makro nacte zahlavi a zacne cist jednotlive radky. Po precteni % kazdeho radku je obsah polozky pripraven v expanznim makru % \e. Presneji \e[zahlavi] expanduje na text odpovidajici polozky. % V teto situaci \scanbase spusti makro \lineaction, ktere si muze % uzivatel definovat jak chce. % % Po ukonceni makra \lineaction cte scanbase dalsi radek tabulky, naplni % znovu expanzni makra \e texty polozek z tohoto radku a spusti znovu % \lineaction. To se opakuje tak dlouho, dokud neni ukonceno cteni % tabulky. Navic je makru \lineaction k dispozici registr \numline % obsahujici cislo prave precteneho radku. % % Priklad pouziti: % % \input scancsv % % \newcount \mylines % \def\bb #1 #2/{\hbox to#1{#2\hss}} % % \def\printaction{\global\advance\mylines by1 %% \scanabase pracuje % \hbox{% %% uvnitr skupiny! % \bb 2em \the\numline./ % \bb 26em \e[nazev]/ % \bb 10em \e[prijmeni] \e[jmeno]/ % \bb 3em \hfill\e[kc2002]/ % \bb 3em \hfill\e[kc2001]/} % } % \def\lineaction{\if K\e[typ_clenstvi]% Kolektivni clenove % \printaction % \else \if G\e[typ_clenstvi]% Gymnazia % \printaction % \fi\fi % tisknu jen kolektivni cleny a gymnazia % } % \scanbase database1 % \scanbase database2 % {\it Number of printed lines: \the\mylines}. % \end % % Pokud neni uzivatelem definovano makro \lineaction, pouzije % scanbase sve vlastni (defaultni) makro, ktere vytiskne vsechny polozky % jednoho radku do odstavce ve velmi zhustenem tvaru (vyzkousejte si). % % Kazde \scanbase vstupuje do skupiny, pak spusti \beginhook, % pak cte hlavicku a jednotlive radky, jak bylo receno vyse, % pak spusti \endhook a nakonec vyleze ze skupiny. % Sekvence \beginhook a \endhook muze predefinovat uzivatel, defaltne maji % hodnotu \relax % % Uzivatel muze po nacteni \input scanbase predefinovat makra \separator % a \obklopeni (viz nize), pokud jsou v datech polozky a zahlavi % oddeleny jinymi znaky nez strednik a symbol palce.