#!/usr/bin/perl # # Jeff Mock # 2030 Gough # San Francisco, CA 94109 # jeff@mock.com # (c) 2004 # # # $Id: mkdlpf_coeff 242 2004-09-29 19:29:15Z jeff $ # use Getopt::Long; use Math::Trig; use Math::BigInt; use POSIX; # Window types %windows = ( 'none' => 1, 'rect' => 1, 'hamming' => 1, 'hanning' => 1, 'blackman' => 1, 'bartlett' => 1, 'tri' => 1, 'fft' => 1, ); $mean_gain = 1.0; $opt_width = 18; # datapath width $opt_dec = 0; # filter decimation (length of table) $opt_vdec = 0; # max variable filter decimation (length of table) $opt_dir = "."; $opt_odir = "."; $opt_prefix = "jdlp"; $opt_norun = 0; $opt_imp = "virtex2"; $opt_start = 2; $opt_stop = 1; $opt_fwid = 4; $opt_window = "none"; $opt_idx = 0; $opt_narrow = 1.00; $opt_gain = 0; $opt_table = ""; $0 =~ /(.*)\/.*/; $opt_dir = $1 eq "" ? "." : $1; # binmode(STDIN, ":bytes") if $] >= 5.008; %opts = ( 'width=o' => \$opt_width, 'dec=o' => \$opt_dec, 'vdec=o' => \$opt_vdec, 'dir=s' => \$opt_dir, 'odir=s' => \$opt_odir, 'prefix=s' => \$opt_prefix, 'norun' => \$opt_norun, 'imp=s' => \$opt_imp, 'start=f' => \$opt_start, 'stop=f' => \$opt_stop, 'fwid=o' => \$opt_fwid, 'window=s' => \$opt_window, 'idx=o' => \$opt_idx, 'narrow=f' => \$opt_narrow, 'gain' => \$opt_gain, 'table=s' => \$opt_table, ); $wins = join ' ', keys %windows; if (!GetOptions(%opts)) { print STDERR " Generate pieces of verilog for a decimating LPF mkdlpf_coeff [options] [--width=n] Datapath width of FFT ($opt_width) [--dec=n] Length of table ($opt_dec) [--vdec=n] max length of variable table ($opt_vdec) [--dir=s] Directory with other mkdlpf programs ($opt_dir) [--odir=s] Output directory for verilog ($opt_odir) [--prefix=s] Prefix module name with string ($opt_prefix) [--norun] Do not recurse and build sub-modules [--imp=s] Set target implementation ($opt_imp) [--start=f] Start postion (x pi) in sinc [--stop=f] Stop position (x pi) in sinc [--fwid=n] Total width of filter (x pi) [--window=s] Windowing function: ${wins} [--idx=n] Index to add to end of name [--narrow=f] Compress sync function ($opt_narrow) [--gain] Just report filter gain [--table=s] Just make table of coefficients \n"; exit 1; } die "Window type ${opt_window} not defined" unless $windows{$opt_window}; sub pcode { my $fd = shift; my $sp = shift; my $code = shift; $code =~ s/^.*?\n//m; $code =~ s/^ {$sp}//mg; $code =~ s/ *$//; print $fd $code; } sub log2 { my $v = shift; return int(log(2*$v-1)/log(2)); } our $inst_num=0; sub virtex2_bram { my $fd = shift; my $ports = shift; my $addr = shift; my $data = shift; my $awidth = shift; my $dwidth = shift; my $vals = shift; my $cmts = shift; my $addrb = shift; my $datab = shift; my $n = @$vals; my $dwidm1 = $dwidth-1; my %parts = ( # depth => bram address bits : # main data width of bram : # parity data width of bram : # name of single port part : # name of dual port part 8 => "9:32:4:RAMB16_S36:RAMB16_S36_S36", 16 => "9:32:4:RAMB16_S36:RAMB16_S36_S36", 32 => "9:32:4:RAMB16_S36:RAMB16_S36_S36", 64 => "9:32:4:RAMB16_S36:RAMB16_S36_S36", 128 => "9:32:4:RAMB16_S36:RAMB16_S36_S36", 256 => "9:32:4:RAMB16_S36:RAMB16_S36_S36", 512 => "9:32:4:RAMB16_S36:RAMB16_S36_S36", 1024 => "10:16:2:RAMB16_S18:RAMB16_S18_S18", 2048 => "11:8:1:RAMB16_S9:RAMB16_S9_S9", 4096 => "12:4:0:RAMB16_S4:RAMB16_S4_S4", 8192 => "13:2:0:RAMB16_S2:RAMB16_S2_S2", 16384 => "14:1:0:RAMB16_S1:RAMB16_S1_S1", ); die "coeff ROM depth ($n) is too deep for Xilinx" if $n > 16384; die "coeff ROM depth ($n) is not power of 2?" if 1 << log2($n) != $n; die "coeff ROM size ($n) is too small for BRAM!" if ($n < 8); die "coeff ROM width ($dwidth) is too small!" if ($dwidth < 1); my ($braddr, $mwid, $pwid, $brname, $brnameb) = split ':', $parts{$n}; $brname = $brnameb if $ports==2; my $nparts = ceil($dwidth/($mwid+$pwid)); ## print "awidth = $awidth\n"; ## print "dwidth = $dwidth\n"; ## print "n = $n\n"; ## print "braddr = $braddr\n"; ## print "mwid = $mwid\n"; ## print "pwid = $pwid\n"; ## print "nparts = $nparts\n"; # Make two tables for the width in each memory to use in # the main part and the parity part. Use the main parts # first and then go back and use parity parts. The bit # assignments are different though, first bit range is # main part of first memory, second section is parity of # first memory, and so on. # my @muwid = (); my @puwid = (); my $bits = $dwidth; for my $npart (0 .. $nparts-1) { $muwid[$npart] = $bits; $muwid[$npart] = $mwid if $mwid < $bits; $bits -= $muwid[$npart]; } for my $npart (0 .. $nparts-1) { $puwid[$npart] = $bits; $puwid[$npart] = $pwid if $pwid < $bits; $bits -= $puwid[$npart]; } die "Botch: $bits bits left after width assignment" if $bits > 0; # Declare wires for memory output # pcode($fd, 4, " wire [${dwidm1}:0] ${data}; "); pcode($fd, 4, " wire [${dwidm1}:0] ${datab}; ") if ($ports==2); # Iterate through the blockrams and initialize them and then # instantiate them. # my $bitpos = 0; for my $npart (0 .. $nparts-1) { my $bname = sprintf "m%02d", $inst_num++; # Make a mask for bits from this BRAM # my $mask = Math::BigInt->new(1); $mask->blsft($muwid[$npart]); $mask->bdec(); # Iterate though the (max) 64 parameters used to init # blockram. Each parameter initializes 256-bits of the ram # (regardless of the ram configuration). The bits are # organized in little-endian fashion. # my $tidx = 0; my @init_m = (); my @init_p = (); printf $fd "\n%s// synthesis translate_off\n", " "x4; printf $fd "%sdefparam\n", " "x4; for my $init (0 .. 63) { my $iv = Math::BigInt->new(0); for my $pos (0 .. 256/$mwid-1) { my $tv = $$vals[$tidx]->copy; $tv->brsft($bitpos); $tv->band($mask); $tv->blsft($pos*$mwid); $iv->bior($tv); $tidx++; last if $tidx >= $n; } my $str = $iv->as_hex; $str =~ s/^0x//; $str = ("0"x(64-length($str))) . $str if length($str) < 64; printf $fd "%s%s.INIT_%02X = 256\'h%s%s\n", " "x8, $bname, $init, $str, $tidx>=$n ? ";" : ","; $init_m[$init] = $str; last if $tidx >= $n; } $mbitpos = $bitpos; $bitpos += $muwid[$npart]; # Do the same iteration except through the (max) 8 # parameters used to initialize the extra parity bits of # the ram. # if ($puwid[$npart] > 0) { my $mask = Math::BigInt->new(1); $mask->blsft($puwid[$npart]); $mask->bdec(); my $tidx = 0; printf $fd "\n%sdefparam\n", " "x4; for my $init (0 .. 7) { my $iv = Math::BigInt->new(0); for my $pos (0 .. 256/$pwid-1) { my $tv = $$vals[$tidx]->copy; $tv->brsft($bitpos); $tv->band($mask); $tv->blsft($pos*$pwid); $iv->bior($tv); $tidx++; last if $tidx >= $n; } my $str = $iv->as_hex; $str =~ s/^0x//; $str = ("0"x(64-length($str))).$str if length($str) < 64; printf $fd "%s%s.INITP_%02X = 256'h%s%s\n", " "x8, $bname, $init, $str, $tidx>=$n ? ";" : ","; $init_p[$init] = $str; last if $tidx >= $n; } $pbitpos = $bitpos; $bitpos += $puwid[$npart]; } printf $fd "%s// synthesis translate_on\n\n", " "x4; # Initialize main part for synth # reuse init strings saved from defparams # for my $init (0..63) { if (defined($init_m[$init])) { printf $fd "%s// synthesis attribute INIT_%02X of %s is \"%s\"\n", " "x4, $init, $bname, $init_m[$init]; } } # Initialize parity part for synth if ($puwid[$npart] > 0) { for my $init (0..7) { if (defined($init_p[$init])) { printf $fd "%s// synthesis attribute INITP_%02X of %s is \"%s\"\n", " "x4, $init, $bname, $init_p[$init]; } } i } # Make instance of the BRAM. Complicated because it # handles both single and dual ported cases. # # Also handles awkward cases where full data width isn't # used and dummy wires need to be declared to make sure # the right portion of a partially used RAM connects # correctly. bleh. # my $ram_addr = $addr; $ram_addr = sprintf("{ %d'b0, %s}", $braddr-$awidth, $addr) if $braddr > $awidth; my $ram_do = sprintf("%s[%d:%d]", $data, $mbitpos+$muwid[$npart]-1, $mbitpos); if ($mwid > $muwid[$npart]) { my $ew = $mwid - $muwid[$npart]-1; pcode($fd, 16, " wire [${ew}:0] dnc_${bname}; "); $ram_do = "{dnc_${bname}, ${ram_do}}"; } my $ram_pdo = ""; if ($pwid > 0) { if ($puwid[$npart] > 0) { $ram_pdo = sprintf("%s[%d:%d]", $data, $pbitpos+$puwid[$npart]-1, $pbitpos); if ($pwid > $puwid[$nparp]) { my $ew = $pwid - $puwid[$npart]-1; pcode($fd, 24, " wire [${ew}:0] pnc_${bname}; "); $ram_pdo = "{pnc_${bname}, ${ram_pdo}}"; } } else { my $ew = $pwid-1; pcode($fd, 20, " wire [${ew}:0] pnc_${bname}; "); $ram_pdo = "pnc_${bname}"; } } if ($ports == 2) { my $ram_addrb = $addrb; $ram_addrb = sprintf("{ %d'b0, %s}", $braddr-$awidth, $addrb) if $braddr > $awidth; my $ram_dob = sprintf("%s[%d:%d]", $datab, $mbitpos+$muwid[$npart]-1, $mbitpos); if ($mwid > $muwid[$npart]) { my $ew = $mwid - $muwid[$npart]-1; pcode($fd, 20, " wire [${ew}:0] dbnc_${bname}; "); $ram_dob = "{dbnc_${bname}, ${ram_dob}}"; } if ($pwid > 0) { if ($puwid[$npart] > 0) { $ram_pdob = sprintf("%s[%d:%d]", $datab, $pbitpos+$puwid[$npart]-1, $pbitpos); if ($pwid > $puwid[$nparp]) { my $ew = $pwid - $puwid[$npart]-1; pcode($fd, 28, " wire [${ew}:0] pbnc_${bname}; "); $ram_pdob = "{pbnc_${bname}, ${ram_pdob}}"; } } else { my $ew = $pwid-1; pcode($fd, 24, " wire [${ew}:0] pbnc_${bname}; "); $ram_pdob = "pbnc_${bname}"; } } pcode($fd, 12, " ${brname} ${bname} ( .CLKA ( ck ), .CLKB ( ck ), .ADDRA ( ${ram_addr} ), .ADDRB ( ${ram_addrb} ), .DIA ( ${mwid}'b0 ), .DIB ( ${mwid}'b0 ), .DOA ( ${ram_do} ), .DOB ( ${ram_dob} ), "); pcode($fd, 12, " .DIPA ( ${pwid}'b0 ), .DIPB ( ${pwid}'b0 ), .DOPA ( ${ram_pdo} ), .DOPB ( ${ram_pdob} ), ") if $pwid>0; pcode($fd, 12, " .ENA ( ce ), .WEA ( 1'b0 ), .SSRA ( 1'b0 ), .ENB ( ce ), .WEB ( 1'b0 ), .SSRB ( 1'b0 ) ); "); } else { pcode($fd, 12, " ${brname} ${bname} ( .CLK ( ck ), .ADDR ( ${ram_addr} ), .DI ( ${mwid}'b0 ), .DO ( ${ram_do} ), "); pcode($fd, 12, " .DIP ( ${pwid}'b0 ), .DOP ( ${ram_pdo} ), ") if $pwid>0; pcode($fd, 12, " .EN ( ce ), .WE ( 1'b0 ), .SSR ( 1'b0 ) ); "); } } } sub dlpf_coeff_rom { my $fd = shift; my $addr = shift; my $data = shift; my $awidth = shift; my $dwidth = shift; my $vals = shift; my $cmts = shift; my $n = @$vals; my $dwidm1 = $dwidth-1; if ($opt_imp eq "virtex2" && $n > 32) { virtex2_bram($fd, 1, $addr, $data, $awidth, $dwidth, $vals, $cmts); } else { # behavioral implementation, also synthesizes well for xilinx # if rom width is power of 2, but sadly XST synth is inefficient # if you want a dual port rom or width is something like 9, 18, # or 36. For these cases primitives need to be instantiated. # my $type = "DISTRIBUTED"; my $attr = ""; $type = "BLOCK" if $n>32; $attr = "synthesis attribute ROM_STYLE of ${data} is ${type}" if ($opt_imp eq "virtex2"); pcode($fd, 12, " // ${attr} reg [${dwidm1}:0] ${data}; always @(posedge ck) begin case (${addr}) "); for my $i (0 .. $n-1) { my $str = $$vals[$i]->as_hex(); $str =~ s/^0x//; printf $fd "%s// %s\n", " "x12, $$cmts[$i]; printf $fd "%s${awidth}\'d%d : ${data} <= ${dwidth}\'h${str};\n", " " x 12, $i; } pcode($fd, 12, " endcase end "); } } $g_gain = 0.0; sub dlpf_coeff_vals { my $start = shift; my $stop = shift; my $width = shift; my $n = shift; my $fwid = shift; my @vals = (); my @cmts = (); my $highbit = 1 << ($width-1); my $mask = (1 << $width) - 1; if ($opt_window eq "fft") { # This is a dummy for testing that puts 1's in # the last coefficient rom and 0's in the other ROMs # for a unity transfer function from the filter. # if ($start == $fwid/2) { # 1's for my $idx (0 .. $n-1) { my $hex = ($highbit-1) & $mask; my $cmt = "1"; push @vals, Math::BigInt->new($hex); push @cmts, $cmt; } } else { # 0's for my $idx (0 .. $n-1) { my $hex = 0; my $cmt = "0"; push @vals, Math::BigInt->new($hex); push @cmts, $cmt; } } } else { # Generate floating vals for portion of sin(x)/x # for my $idx (0 .. $n-1) { # my $t = $start + ($stop-$start)*$idx/$n; # Make values for entire window, not just this # coefficient ROM. # for my $idx (0 .. $n*$opt_fwid-1) { # offset value for even function to take advantage # of symmetry and reduce number of multipliers by half. # my $t = $opt_fwid/2 - ($idx+0.5)/$n; my $v = 1.0; $v = 1.0 * $opt_narrow * sin($t * pi / $opt_narrow) / ($t * pi) if ($t != 0); my $cmt = sprintf("sinc(%.3f * pi / %.3f )", $t, $opt_narrow); push @vals, $v; push @cmts, $cmt; } if ($opt_window eq "tri" || $opt_window eq "bartlett") { # Triangle windowing function for my $idx (0 .. $n*$opt_fwid-1) { my $t = $opt_fwid/2 - ($idx+0.5)/$n; my $m = 1.0 - abs($t / ($opt_fwid/2)); $vals[$idx] *= $m; $cmts[$idx] = sprintf("%s * %.3f = %.3f", $cmts[$idx], $m, $vals[$idx]); } } elsif ($opt_window eq "hanning") { # Triangle windowing function for my $idx (0 .. $n*$opt_fwid-1) { my $t = $opt_fwid/2 - ($idx+.5)/$n; my $m = abs($t / ($opt_fwid/2)) * pi; $m = 0.5 + 0.5*cos($m); $vals[$idx] *= $m; $cmts[$idx] = sprintf("%s * %.3f = %.3f", $cmts[$idx], $m, $vals[$idx]); } } elsif ($opt_window eq "hamming") { # Triangle windowing function for my $idx (0 .. $n*$opt_fwid-1) { my $t = $opt_fwid/2 - ($idx+0.5)/$n; my $m = abs($t / ($opt_fwid/2)) * pi; $m = 0.54 + 0.46*cos($m); $vals[$idx] *= $m; $cmts[$idx] = sprintf("%s * %.3f = %.3f", $cmts[$idx], $m, $vals[$idx]); } } elsif ($opt_window eq "blackman") { # Triangle windowing function for my $idx (0 .. $n*$opt_fwid-1) { my $t = $opt_fwid/2 - ($idx+0.5)/$n; my $m = abs($t / ($opt_fwid/2)) * pi; $m = 0.42 + 0.5*cos($m) + 0.08*cos(2.0*$m); $vals[$idx] *= $m; $cmts[$idx] = sprintf("%s * %.3f = %.3f", $cmts[$idx], $m, $vals[$idx]); } } elsif ($opt_window eq "none" || $opt_window eq "rect") { # Just fixup comments... for my $idx (0 .. $n*$opt_fwid-1) { my $cmt = sprintf("%s = %.4f", $cmts[$idx], $vals[$idx]); $cmts[$idx] = $cmt; } } else { die "Botch: failed to handle window type ${opt_window}"; } # Calculate peak filter value and DC gain my $peak = 0.0; my $gain = 0.0; my $gain_tot = 0; my $gain_min = 10.0; my $gain_max = 0.0; my $gain_sum = 0.0; my $peak_sum = 0.0; for my $pos (0 .. $n-1) { $gain_sum = 0.0; $peak_sum = 0.0; for ($i=0; $i<$opt_fwid*$n; $i+=$n) { $gain_sum += $vals[$i+$pos]; $peak_sum += abs($vals[$i+$pos]); } $peak = $peak_sum if $peak_sum>$peak; $gain_max = $gain_sum if $gain_sum>$gain_max; $gain_min = $gain_sum if $gain_sum<$gain_min; $gain_tot += $gain_sum; } $gain = $gain_tot / $n; $mean_gain = $gain; # if (!$opt_gain) { # printf " gain min %.3f\n", $gain_min; # printf " gain max %.3f\n", $gain_max; # printf " gain %.3f\n", $gain; # printf " peak %.3f\n", $peak; # printf " Not scaling...\n"; # } # Scale values for unity peak gain # $_ /= $peak for (@vals); # printf " DC gain scaled to %.3f for peak gain of 1.0\n", # $gain/$peak; # $g_gain = sprintf("%.3f", $gain/$peak); $g_gain = "1.000"; # Write out ascii coefficient table for tweaky people # to look at # if ($opt_idx) { my $fdc; my $fnc = "${opt_odir}/${opt_prefix}_coeff"; open $fdc, "> $fnc" or die "Cannot open coefficient file $fnc\n $!"; print $fdc "# gain: $mean_gain\n"; print $fdc "# decimation: ${opt_dec}\n"; print $fdc "# width: ${opt_fwid}\n"; print $fdc "# narrow: ${opt_narrow}\n"; print $fdc "# window: ${opt_window}\n"; print $fdc "# coefficients: ${opt_width} bits\n"; print $fdc "# \n"; for my $v (@vals) { # Convert to hex value of correct width my $hex = int(($v*$highbit) + 0.5); $hex = ($highbit-1) if $hex >= $highbit; $hex = $hex & $mask; # Convert hex value back to quantized real value $hex = $hex - (1 << $width) if $hex&$highbit; my $rv = $hex / $highbit; printf $fdc "%.14f\n", $rv; } close $fdc; } $si = ($opt_fwid/2 - $opt_start)*$opt_dec; @vals = @vals[$si..$si+$n-1]; @cmts = @cmts[$si..$si+$n-1]; # Convert floating vals to hex BigInt of appropriate width for my $idx (0 .. $n-1) { my $hex = int(($vals[$idx]*$highbit) + 0.5); $hex = ($highbit-1) if $hex >= $highbit; $hex = $hex & $mask; $vals[$idx] = Math::BigInt->new($hex); } } return (\@vals, \@cmts); } sub dlpf_coeff_module { my $fd; my $dec; $dec = $opt_dec if $opt_dec; $dec = $opt_vdec if $opt_vdec; my $decm1 = $dec-1; my $tad = log2($dec); my $tadm1 = $tad - 1; my $widm1 = $opt_width - 1; my $vals; my $cmts; ($vals,$cmts) = dlpf_coeff_vals($opt_start, $opt_stop, $opt_width, $opt_dec, $opt_fwid) if $opt_dec; if ($opt_gain) { printf("%f\n", $mean_gain); exit(0); } my $fn = "${opt_odir}/${opt_prefix}_coeff_${opt_idx}.v"; $fn = "${opt_odir}/${opt_prefix}_coeff.v" if $opt_vdec; if (-s $fn) { print " $fn exists, mkdlpf_coeff not creating\n"; return 0; } print " Creating $fn\n"; open $fd, "> $fn" or die "mkdlpf_coeff cannot create file $fn.\n $!"; pcode($fd, 8, " // LPF coefficient table // // Decimation is ${dec} // Filter width is ${opt_fwid} * pi // Start position is ${opt_start} * pi // Stop position is ${opt_stop} * pi // Window function is ${opt_window} // DC gain scaled by ${g_gain} // // 3 pipeline delays from sync_i to output // // Generated by mkdlpf_coeff // module ${opt_prefix}_coeff_${opt_idx} ( ck, addr, ") if $opt_dec; pcode($fd, 8, " // LPF coefficient ram // // Max decimation is ${dec} // 3 pipeline delays from sync_i to output // // Generated by mkdlpf_coeff // module ${opt_prefix}_coeff ( ck, addr, diag_we, diag_data, ") if $opt_vdec; pcode($fd, 8, " c ); input ck; input [${tadm1}:0] addr; "); pcode($fd, 8, " input diag_we; input [${widm1}:0] diag_data; ") if $opt_vdec; pcode($fd, 8, " output [${widm1}:0] c; reg [${widm1}:0] c; "); dlpf_coeff_rom($fd, "addr", "c_p", $tad, $opt_width, $vals, $cmts) if $opt_dec; pcode($fd, 8, " // Read/write memory for filter coefficients reg [${widm1}:0] mem [0:${decm1}]; reg [${widm1}:0] c_p; always @(posedge ck) begin if (diag_we) mem[addr] <= diag_data; c_p <= mem[addr]; end ") if $opt_vdec; pcode($fd, 8, " always @(posedge ck) c <= c_p; endmodule "); close $fd; } sub dlpf_coeff_table { my $vals; my $cmts; my $fd; open $fd, "> $opt_table" or die "Cannot open output file $opt_table"; ($vals,$cmts) = dlpf_coeff_vals($opt_start, $opt_stop, $opt_width, $opt_dec, $opt_fwid) if $opt_dec; if ($opt_gain) { printf("%f\n", $mean_gain); exit(0); } my $n = @$vals; for my $i (0 .. $n-1) { my $str = $$vals[$i]->as_hex(); $str =~ s/^0x//; # print $fd "${str} $$cmts[$i]\n"; print $fd "${str}\n"; } close $fd; exit 0; } die "Cannot specify both --dec=n and --vdec=b" if $opt_dec && $opt_vdec; dlpf_coeff_table() if $opt_table; dlpf_coeff_module();