LOCUS       KY379508                6587 bp    DNA     linear   BCT 24-APR-2017
DEFINITION  Escherichia coli strain O62 O antigen gene cluster, complete
            sequence.
ACCESSION   KY379508
VERSION     KY379508.1
KEYWORDS    .
SOURCE      Escherichia coli
  ORGANISM  Escherichia coli
            Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales;
            Enterobacteriaceae; Escherichia.
REFERENCE   1  (bases 1 to 6587)
  AUTHORS   Hou,X., Perepelov,A.V., Guo,X., Senchenkova,S.N., Shashkov,A.S.,
            Liu,B., Knirel,Y.A. and Wang,L.
  TITLE     A gene cluster at an unusual chromosomal location responsible for
            the novel O-antigen synthesis in Escherichia coli O62 by the ABC
            transporter-dependent pathway
  JOURNAL   Glycobiology (2017) In press
   PUBMED   28402541
  REMARK    Publication Status: Available-Online prior to print
REFERENCE   2  (bases 1 to 6587)
  AUTHORS   Hou,X.
  TITLE     Direct Submission
  JOURNAL   Submitted (21-DEC-2016) Microbiology, TEDA Institute of Biological
            Sciences and Biotechnology, Nankai University, 23 Hongda Street,
            Tianjin 300457, China
FEATURES             Location/Qualifiers
     source          1..6587
                     /organism="Escherichia coli"
                     /mol_type="genomic DNA"
                     /strain="O62"
                     /db_xref="taxon:562"
     misc_feature    1..6587
                     /note="O antigen gene cluster"
     gene            1..768
                     /gene="wzm"
     CDS             1..768
                     /gene="wzm"
                     /note="Wzm"
                     /codon_start=1
                     /transl_table=11
                     /product="ABC transporter permease"
                     /protein_id="ARJ35760.1"
                     /translation="MKFNLGYLFDLLIVITNKDLKVRYKSSIFGYLWSIANPLLFAMI
                     YYFIFKMVMRVQIPNYTVFLITGLFPWQWFASSATNSLFSFISNAQIIKKTVFPRSVI
                     PLSNVMMEGLHFLCTIPVIAVFLYIYGMSPSLSWLWGIPVLAVGQIIFTFGFALILST
                     LNLFFRDLERFVALGIMLMFYCTPILYNSDMIPKQFSWIIDYNPLASMIMAWRDLLMN
                     GNLDYMHITYLYSSGIVILMLGVWVFNKLKYRFAEIL"
     gene            768..1508
                     /gene="wzt"
     CDS             768..1508
                     /gene="wzt"
                     /note="Wzt"
                     /codon_start=1
                     /transl_table=11
                     /product="sugar ABC transporter ATP-binding protein"
                     /protein_id="ARJ35761.1"
                     /translation="MTTVIEFCNVTKEYPLYHHIGSGIKDLFFHPRRAFNLLKGRKYL
                     AIENVSFSVNKGESVALIGRNGAGKSTTLGLVAGVLKPSKGEVKVVGRVASMLELGGG
                     FHPELTGRENIKLNATLLGLRRAEVNKRLDKIIEFSELGEFIDEPIRVYSSGMLAKLG
                     FSVISQVDPDVLIIDEVLAVGDIAFQRKCIETINEFRKRGVTILFVSHSMNDVMKICD
                     KVVWVENHRLKAIGEASSIVDEYVASMA"
     gene            1508..2671
                     /gene="glf"
     CDS             1508..2671
                     /gene="glf"
                     /note="Glf"
                     /codon_start=1
                     /transl_table=11
                     /product="UDP-galactopyranose mutase"
                     /protein_id="ARJ35762.1"
                     /translation="MEKRMQKNVLIVGAGFSGAVIGRLLAEAGHKVTIIDSRPHVAGN
                     CYSERDTETDVMVHTYGPHIFHTDNAEVWEYIKQYTEMMPYVNRVKTTVNGQVFSLPI
                     NLHTINQFFHTTCSPTEARKLIDEKSDKTIEEPATFEEQALRFVGKELYEAFFKGYTI
                     KQWGMSPSELPASILKRLPVRFNYDDNYFNHKYQGMPKDGYTVIVDKILAHENISVSL
                     NTRFNEQSRNEYDHVFYSGALDGFYNYDMGRLGYRTLDFDAFRTEGDYQGCAVMNYGE
                     QEVPYTRITEHKYFAPWENHDKSICYREFSRSCEPEDIPYYPIRQVGEMEMLQNYLDR
                     AEQEANITFVGRLGTYRYLDMDVTIAEALETGRTYLKSLEQNSVMPVFTVKVR"
     gene            2668..3558
                     /gene="wfdJ"
     CDS             2668..3558
                     /gene="wfdJ"
                     /note="WfdJ"
                     /codon_start=1
                     /transl_table=11
                     /product="GT2 family glycosyl transferase"
                     /protein_id="ARJ35765.1"
                     /translation="MNVVALIVTYNRLDKLKTTIEATLALPFQYIVVVNNASTDDTQS
                     YLDSFCDERIKVLHREVNSGGAGGFKYGARWITENLSSDWVLFYDDDAYPHTDFYDNL
                     IRWQVQQGTVYCCKVLDTENNLCKMNMPWKKYPHGILENIAYLRQPEKFIPNGQSKEN
                     VSSVSFVGMLIDIKLLSSSIDYIYDDLFIYFDDVYFSYNLRLNAIPIIYVPELIIIHD
                     VDTRTMPMSYWKLYYLVRNLILSRVLFPKKAPFSRIDILLRVLKYFLIGIKNPDPKKA
                     IGGIIKGISDGVKGRRGQGI"
     gene            3564..4700
                     /gene="wbbO"
     CDS             3564..4700
                     /gene="wbbO"
                     /note="WbbO"
                     /codon_start=1
                     /transl_table=11
                     /product="galactosyl transferase"
                     /protein_id="ARJ35763.1"
                     /translation="MKRKLCYFVNSAWYFELHWVDRGVAAIKSGYEVYVIANFSDAAT
                     VQRLSSLGFHCINTYIDEKNINPLVFIKDFLVAKKILDKVKPDILHCITIKPGVISCL
                     WAKYNNVGLVYSFVGLGRVFESNRIIFRTLKVIVTKLYRYLFNNINYKLVFEHRNDQH
                     KMLSLLHLPKNNSIVIDGAGIDISYFNFKTPKKDSPATIFFAGRMLKSKGLNDLIQAK
                     NLLKQRGIDCILKVAGILVDNDEDAISRKQIQQWQDQDDIEWLGTRTDIKELLEAAHV
                     VALPSLYPEGIPRILLEAGAVGRPCVVYDNDGCNALIVDNYNGFVVERSNVSKLADKI
                     SLLITNPDERDRMGKNARQNIEERFTSTLVIEQTLKVYQEIYNK"
     CDS             4836..6587
                     /codon_start=1
                     /transl_table=11
                     /product="glycosyltransferase"
                     /protein_id="ARJ35764.1"
                     /translation="MRLLVQDIIVPSIGLCTDESLYYHAGERVFADLNSKEIHVPKNA
                     VISFDSFFNSLSVKKWKENTTIDNLLVTYKVRGKGCLYINHFNDSKDEVLEQIIIDND
                     EGEVQFSLSGLHTGYIYLRWRAFEESVIESFSFYTSSPCKDVKLGLVITTFNREEQVV
                     KSVKRIRDELLTDPRYVNKIQLYLINNGNDVDGIDYPEVNIVKNKNLGGAGGFSRGLC
                     ELTDEGTFTHCIFMDDDAACEIDSIRRTFALLSLSNDDNLAVSAAMLYEERPRIVHEA
                     GAVVFCDGIFHRPAKIGLNVSERSGIISFDEEEALGYGAWWFYAFNIKNTKYYPFPFF
                     VRGDDMLFGLMNDNHKIITLNGIASWQMDFNRKLNPLVFYLSFRAMFMSSCRKVTWRN
                     RIMLSAFFMREVIILSMANRYESAQAVVQAYKDCMSGIDFWENNVDCAEVRKKINQLT
                     INERFNIPSSDLLKIETDSFLYIKENKIHKALRYLTLNGHLIPKYFFNKRARLIPETH
                     KNPTNMVFMRNDVYYVSQMNGSAMKLSHSKEKFFRVVLNASLLSFNTLFSLKKRLESY
                     QSEITRLTTKDFWKTKF"
ORIGIN      
        1 atgaagttta atttaggata tttatttgat ttacttattg taattacgaa taaagattta
       61 aaagtgcgtt acaaaagtag catttttgga tatttatggt ctattgcaaa ccctttatta
      121 tttgcgatga tttactattt tatatttaaa atggtaatga gggttcaaat tccaaattat
      181 accgttttcc tcattaccgg tttgttccca tggcaatggt ttgcaagttc ggcaaccaat
      241 tcattgtttt cttttatatc aaatgcacag attattaaga agacagtatt cccgcggtca
      301 gttattcctc taagtaatgt gatgatggaa gggttacatt ttttatgtac aattcctgtg
      361 attgcagttt ttctttatat ttatgggatg tcaccctctc tgagttggct ttggggtatt
      421 cctgttttgg ctgtcggaca gattattttt acatttggat ttgcactgat cttatctaca
      481 ttaaatttgt ttttccgtga cctggagcgt tttgtcgcct taggtattat gctgatgttc
      541 tattgtactc cgatactcta taattctgac atgattccga agcagtttag ctggattatt
      601 gattataacc ctctggcaag catgatcatg gcctggcggg atctacttat gaatggtaat
      661 ctggattaca tgcatattac ttatctatac tcttctggca tcgttatttt gatgctcgga
      721 gtgtgggtct ttaataaatt aaaataccga tttgcagaga ttttgtgatg acaaccgtta
      781 ttgaattttg caatgtaacc aaagagtacc ccctttatca tcatattggc tctggcatta
      841 aagatttatt ttttcaccct cgtcgggcat ttaacttact taaagggcgt aagtatctgg
      901 cgattgaaaa cgtatcattc agtgtgaata aaggtgagtc tgtcgcattg attggtagga
      961 atggagccgg taaaagtact acccttggtc tcgttgctgg cgtgctaaag ccaagcaagg
     1021 gtgaagtaaa agttgttggg cgagtggctt cgatgcttga attgggaggc ggttttcatc
     1081 cggaacttac tggccgtgaa aatatcaaat taaatgccac gttgttgggg ttgcgtagag
     1141 cagaagttaa caagaggctc gataaaatta tcgagttttc agagttaggt gagttcattg
     1201 atgagcctat acgcgtatac tccagtggta tgctggcgaa actgggtttt tctgttataa
     1261 gtcaggttga cccggatgta ttgattattg acgaagttct ggcggttggt gatattgcat
     1321 ttcaacgtaa atgtattgaa acaatcaatg aatttagaaa acgtggggta acgatcctgt
     1381 ttgttagcca cagcatgaac gacgttatga agatttgcga taaagtagtg tgggttgaaa
     1441 atcaccggtt gaaagctatc ggtgaggcaa gctcgattgt tgatgaatat gtagcctcga
     1501 tggcttgatg gaaaaacgaa tgcagaagaa tgttcttatt gtaggcgcag gattttcggg
     1561 cgcggttatt ggtcgcttat tggctgaagc tggtcacaag gtcacaatta ttgacagccg
     1621 gccacatgtc gctggtaact gctatagtga gcgggatact gaaaccgatg ttatggttca
     1681 tacctatggc cctcatattt tccatactga caatgctgaa gtatgggaat acatcaagca
     1741 atacaccgaa atgatgcctt atgtgaaccg tgtgaaaaca acggtaaatg ggcaagtatt
     1801 ttccttacca atcaacttac atacaataaa ccaattcttc cacaccactt gttctccaac
     1861 tgaagctcgc aaattgattg atgagaaaag tgataaaaca attgaagagc cagcaacatt
     1921 tgaagagcag gcgttgcgtt ttgtgggcaa agaattatat gaagcctttt ttaaaggtta
     1981 caccataaaa cagtggggaa tgtcaccatc cgaactccct gcttcaattc ttaagcgctt
     2041 gcctgtccgc tttaattatg atgataacta ctttaatcat aaatatcagg gcatgccaaa
     2101 agacggctat acggtgatcg tagataagat actggctcac gagaatattt cagtgtctct
     2161 aaatacacgg tttaatgaac agtcccgcaa tgaatatgac catgtctttt atagtggtgc
     2221 tttggatggt ttctataact atgatatggg gcgcttgggt tatcgtacgt tagattttga
     2281 tgcattccgc accgaaggtg attatcaagg ctgtgcagtc atgaattatg gtgagcaaga
     2341 ggttccttat actcgtataa ccgagcataa atactttgct ccgtgggaaa atcacgataa
     2401 atcaatctgt tatcgtgagt tcagccgctc gtgtgaacct gaagatatcc catactaccc
     2461 aattcgacag gtaggggaga tggaaatgtt acaaaattat ctcgatcgtg ctgaacaaga
     2521 agcaaacatt acttttgttg gtcgtctggg tacttaccgt tatctggata tggatgtgac
     2581 aattgccgaa gctctggaaa ctggccgtac ttatcttaag tctttagagc agaattcagt
     2641 aatgcctgta tttacggtca aagtcagatg aatgttgttg ctctaatcgt cacttataat
     2701 cggttagata agttaaaaac aacaattgag gcaacgctgg cgttgccttt tcaatatata
     2761 gttgtggtta acaatgcgtc aactgatgat acacaatcct atcttgattc tttttgtgat
     2821 gaacggataa aagttttaca tcgcgaggta aacagtggtg gggctggcgg ttttaaatac
     2881 ggagcgcgat ggataaccga aaacctgagc agtgattggg ttctttttta cgatgatgat
     2941 gcttacccac atactgattt ttatgataat ttaattagat ggcaagtgca gcaaggaacg
     3001 gtgtactgct gtaaggtatt agatacagaa aataatcttt gcaagatgaa tatgccatgg
     3061 aaaaaatatc cacatgggat tcttgagaac attgcctatc tacgacagcc tgaaaaattc
     3121 attccgaatg gtcaatccaa agaaaacgta tcgagtgtat catttgtagg aatgctaatc
     3181 gatattaaac tgctaagctc gtcaattgat tatatttatg atgatttatt tatctatttt
     3241 gacgatgttt attttagcta taacttaagg ctcaatgcta taccaataat atatgttcct
     3301 gagttgatca taatacatga tgttgatacc aggactatgc ctatgtccta ttggaaattg
     3361 tattatttag taagaaatct aattctttcg agagtactgt ttccaaaaaa agcgccattc
     3421 tcgaggattg atattttatt aagagtgtta aagtatttct tgataggaat caaaaatcct
     3481 gaccctaaaa aagccattgg tggtataata aaagggatta gtgatggagt taaaggacgc
     3541 agaggtcaag gtatttaact tatatgaaaa gaaaactttg ctactttgtt aactctgcct
     3601 ggtatttcga actccattgg gttgatagag gggttgcagc aattaaaagt ggttatgaag
     3661 tatatgtgat tgctaacttt tctgatgctg caacagttca aagattgagt tctttgggct
     3721 ttcactgcat caatacttat atcgatgaga aaaatattaa tccattagtt tttataaaag
     3781 atttcttggt agctaaaaaa attcttgata aagtaaaacc agacatatta cattgtataa
     3841 caataaagcc tggggtcatc agttgcctat gggctaaata taataatgtt ggtttggttt
     3901 atagttttgt tggcttaggg cgtgtattcg aaagtaatag aattattttt agaactttaa
     3961 aagtaatagt tacaaaatta tatcgctatc ttttcaataa tataaattat aaattggtgt
     4021 ttgaacaccg aaacgatcag cataaaatgc tatcgttgct gcacttacct aaaaataaca
     4081 gcattgtaat tgatggtgca ggcatagata tatcttattt taattttaaa accccaaaaa
     4141 aagatagccc ggcaactatt ttttttgctg gacgaatgct taaaagcaaa ggtttgaatg
     4201 atctgatcca agcaaaaaat ttattaaagc aacggggaat cgattgtatt ctaaaagtgg
     4261 ctggaatctt ggtagataat gatgaagatg ccattagtcg taaacaaatt cagcaatggc
     4321 aagatcaaga tgatatcgaa tggttgggaa ctcgaacaga tatcaaagaa ttgcttgaag
     4381 cggcgcatgt tgttgcatta ccatcacttt acccagaagg aatcccacga atattattag
     4441 aagccggggc cgtaggtcgg ccttgtgtcg tctatgataa tgatggttgc aatgctttga
     4501 tagtagacaa ctataatgga ttcgttgtag agcgaagcaa tgtcagcaaa ctggctgata
     4561 aaattagttt attgatcaca aatcccgatg agcgagatcg catggggaaa aatgccaggc
     4621 aaaatatcga agaacgtttt acgtcaacat tggttattga gcaaacgctc aaagtctatc
     4681 aggaaatata taataaataa ttacttttcg atatttaata ttgtttatgt atttagaagt
     4741 gaaacatatt taacaccatt atataattta ttctagaatg atttcattgt ttatattaat
     4801 ttatgaaaca atgcaaagag ataataagag gcatcatgag attattggtt caggatatta
     4861 tagttcctag cattgggttg tgtactgatg agtcgttata ttaccatgca ggagagcggg
     4921 tttttgctga cttaaacagt aaagaaattc acgttcctaa aaatgcggtg attagttttg
     4981 attccttctt taactctctt agtgtgaaga aatggaaaga aaacacaacc attgataatc
     5041 ttttagtgac atataaagtt cgtgggaaag gttgccttta catcaatcac tttaacgata
     5101 gtaaagatga agttttagag caaattatta tcgataatga tgaaggggaa gtgcaatttt
     5161 cactgagtgg attacatact ggctatatct acttaagatg gcgagcattt gaagaaagcg
     5221 taattgaatc cttttcattt tatacctctt caccttgtaa agacgtcaaa ctcgggttag
     5281 ttataacaac gtttaatcgt gaagaacaag ttgttaagtc ggtcaaacgt attcgtgatg
     5341 aattgttaac tgatccacgt tatgtaaata aaatacaact atatttaatt aacaatggta
     5401 atgatgttga tggtatcgat tatcctgaag taaatatagt caaaaataaa aaccttggcg
     5461 gtgccggtgg gttttctcga ggcttgtgtg agttgacgga cgaaggcact tttacacatt
     5521 gtatatttat ggatgatgat gcagcatgtg aaatcgattc aattcgacgt acatttgcac
     5581 ttctttctct gtctaatgat gataatctgg ctgtatcggc tgctatgctc tatgaagaaa
     5641 ggccaagaat tgtgcatgaa gccggtgctg tagttttctg cgatggtatt tttcatcgcc
     5701 cggcaaagat aggcttaaat gtctctgagc gctcaggtat tattagtttt gatgaagaag
     5761 aggctttggg atatggagcc tggtggtttt atgcgtttaa tataaagaat actaaatact
     5821 atccattccc tttctttgtc cggggtgatg atatgctatt tgggctaatg aacgataatc
     5881 ataagattat cactctcaat ggcattgcat cctggcagat ggatttcaat agaaaactga
     5941 acccgctagt cttttatctt agcttccgag ctatgtttat gagttcctgc cgaaaagtta
     6001 cctggcgtaa tagaataatg ctctcagcat ttttcatgcg tgaagttata attctatcga
     6061 tggctaatcg ctatgaaagt gcgcaggctg ttgtacaagc ttataaagac tgcatgagtg
     6121 ggatagattt ttgggaaaac aatgtagatt gcgctgaagt acgaaaaaag ataaatcagc
     6181 ttactattaa tgagagattt aatatacctt caagtgattt attaaaaatt gaaacagatt
     6241 cttttttgta tataaaagaa aataagatac ataaggcctt aagatacttg acattaaatg
     6301 gccatcttat tcctaaatat ttctttaata aaagagctcg tttaatacct gaaacacaca
     6361 aaaacccgac taatatggtg tttatgcgaa atgatgttta ctatgtttcg caaatgaatg
     6421 gttcagcaat gaaattgtcg cattcaaaag agaaattttt ccgtgttgtt ttaaatgcaa
     6481 gtttacttag ctttaacacg ttattttcct taaagaaacg acttgagtct tatcagtctg
     6541 aaattacccg gttgacgact aaagattttt ggaaaactaa gttttaa
//