{"id":484,"date":"2021-07-21T07:15:01","date_gmt":"2021-07-21T06:15:01","guid":{"rendered":"https:\/\/www.oliver-frick.ch\/?p=484"},"modified":"2021-07-21T07:15:01","modified_gmt":"2021-07-21T06:15:01","slug":"read-utf-8-csv-file-with-perl","status":"publish","type":"post","link":"https:\/\/oliver-frick.ch\/wordpress\/index.php\/2021\/07\/21\/read-utf-8-csv-file-with-perl\/","title":{"rendered":"read UTF-8 csv file with Perl"},"content":{"rendered":"<pre>use warnings;\r\nuse strict;\r\n\r\nmy $filename = $ARGV[0] or die \"Need to get CSV file on the command line\\n\";\r\nmy $outfile = $ARGV[1] or die \"Need to get CSV file on the command line\\n\";\r\n\r\nopen(FH, '&lt;:encoding(utf8)', $filename) or die $!;\r\nopen(FHOUT, '&gt;:encoding(UTF-8)', $outfile) or die $!;\r\n\r\nmy $char;\r\nmy $read;\r\nmy $infield = 0;\r\nmy $column = 0;\r\nmy $row = 0;\r\nmy $value = \"\";\r\n\r\nwhile($read = read FH, $char, 1){\r\n  if ( $char eq \"'\" &amp;&amp; $infield == 0 ) {\r\n    # field begin\r\n    $infield = 1;\r\n    $column++;\r\n  } elsif ( $char eq \"'\" &amp;&amp; $infield == 1 ) {\r\n    # field end\r\n    $infield = 0;\r\n    print \"(\".$value.\")\";\r\n    print FHOUT $value;\r\n    $value = \"\";\r\n  } elsif ( $char eq \"\\n\" &amp;&amp; $infield == 0 ) {\r\n    # end of record\r\n    $column = 0;\r\n    $row++;\r\n    print FHOUT \"\\n\";\r\n  } elsif ( $char eq \",\" &amp;&amp; $infield == 0 ) {\r\n    # comma while not in field = separator of col\r\n    print FHOUT \"\\t\";\r\n  } else {\r\n    # its a value\r\n    $value = $value . $char;\r\n  }\r\n #   my @fields = split \",\" , $_;\r\n #  ( $fields[0] &amp;&amp; print $fields[0] ) || print \"\";\r\n}\r\n\r\nclose(FHOUT);\r\nclose(FH);<\/pre>\n","protected":false},"excerpt":{"rendered":"<p>use warnings; use strict; my $filename = $ARGV[0] or die &#8222;Need to get CSV file on the command line\\n&#8220;; my $outfile = $ARGV[1] or die &#8222;Need to get CSV file on the command line\\n&#8220;; open(FH, &#8218;&lt;:encoding(utf8)&#8216;, $filename) or die $!; &hellip; <a href=\"https:\/\/oliver-frick.ch\/wordpress\/index.php\/2021\/07\/21\/read-utf-8-csv-file-with-perl\/\">Weiterlesen <span class=\"meta-nav\">&rarr;<\/span><\/a><\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"closed","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[],"tags":[],"class_list":["post-484","post","type-post","status-publish","format-standard","hentry"],"_links":{"self":[{"href":"https:\/\/oliver-frick.ch\/wordpress\/index.php\/wp-json\/wp\/v2\/posts\/484","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/oliver-frick.ch\/wordpress\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/oliver-frick.ch\/wordpress\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/oliver-frick.ch\/wordpress\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/oliver-frick.ch\/wordpress\/index.php\/wp-json\/wp\/v2\/comments?post=484"}],"version-history":[{"count":1,"href":"https:\/\/oliver-frick.ch\/wordpress\/index.php\/wp-json\/wp\/v2\/posts\/484\/revisions"}],"predecessor-version":[{"id":485,"href":"https:\/\/oliver-frick.ch\/wordpress\/index.php\/wp-json\/wp\/v2\/posts\/484\/revisions\/485"}],"wp:attachment":[{"href":"https:\/\/oliver-frick.ch\/wordpress\/index.php\/wp-json\/wp\/v2\/media?parent=484"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/oliver-frick.ch\/wordpress\/index.php\/wp-json\/wp\/v2\/categories?post=484"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/oliver-frick.ch\/wordpress\/index.php\/wp-json\/wp\/v2\/tags?post=484"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}